diff options
| author | 2021-02-12 22:22:18 -0800 | |
|---|---|---|
| committer | 2021-02-12 22:22:18 -0800 | |
| commit | d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980 (patch) | |
| tree | 5c900d10847e1768a4951c1e6bec35f2618b5991 /src/video_core/renderer_vulkan | |
| parent | Merge pull request #5877 from ameerj/res-limit-usage (diff) | |
| parent | config: Make high GPU accuracy the default (diff) | |
| download | yuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.tar.gz yuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.tar.xz yuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.zip | |
Merge pull request #5741 from ReinUsesLisp/new-bufcache
video_core: Reimplement the buffer cache
Diffstat (limited to 'src/video_core/renderer_vulkan')
25 files changed, 921 insertions, 1181 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 85121d9fd..19aaf034f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | |||
| 531 | return {}; | 531 | return {}; |
| 532 | } | 532 | } |
| 533 | 533 | ||
| 534 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { | 534 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format) { |
| 535 | switch (index_format) { | 535 | switch (index_format) { |
| 536 | case Maxwell::IndexFormat::UnsignedByte: | 536 | case Maxwell::IndexFormat::UnsignedByte: |
| 537 | if (!device.IsExtIndexTypeUint8Supported()) { | ||
| 538 | UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); | ||
| 539 | return VK_INDEX_TYPE_UINT16; | ||
| 540 | } | ||
| 541 | return VK_INDEX_TYPE_UINT8_EXT; | 537 | return VK_INDEX_TYPE_UINT8_EXT; |
| 542 | case Maxwell::IndexFormat::UnsignedShort: | 538 | case Maxwell::IndexFormat::UnsignedShort: |
| 543 | return VK_INDEX_TYPE_UINT16; | 539 | return VK_INDEX_TYPE_UINT16; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7c34b47dc..e3e06ba38 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 53 | 53 | ||
| 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); |
| 55 | 55 | ||
| 56 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); | 56 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format); |
| 57 | 57 | ||
| 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); | 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); |
| 59 | 59 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 61796e33a..1cc720ddd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext | |||
| 80 | return separated_extensions; | 80 | return separated_extensions; |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | ||
| 84 | VkSurfaceKHR surface) { | ||
| 85 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||
| 86 | const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||
| 87 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||
| 88 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||
| 89 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 90 | } | ||
| 91 | const vk::PhysicalDevice physical_device(devices[device_index], dld); | ||
| 92 | return Device(*instance, physical_device, surface, dld); | ||
| 93 | } | ||
| 83 | } // Anonymous namespace | 94 | } // Anonymous namespace |
| 84 | 95 | ||
| 85 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | 96 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, |
| 86 | Core::Frontend::EmuWindow& emu_window, | 97 | Core::Frontend::EmuWindow& emu_window, |
| 87 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 98 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |
| 88 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 99 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |
| 89 | : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, | 100 | : RendererBase(emu_window, std::move(context_)), |
| 90 | cpu_memory{cpu_memory_}, gpu{gpu_} {} | 101 | telemetry_session(telemetry_session_), |
| 102 | cpu_memory(cpu_memory_), | ||
| 103 | gpu(gpu_), | ||
| 104 | library(OpenLibrary()), | ||
| 105 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||
| 106 | true, Settings::values.renderer_debug)), | ||
| 107 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | ||
| 108 | surface(CreateSurface(instance, render_window)), | ||
| 109 | device(CreateDevice(instance, dld, *surface)), | ||
| 110 | memory_allocator(device, false), | ||
| 111 | state_tracker(gpu), | ||
| 112 | scheduler(device, state_tracker), | ||
| 113 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | ||
| 114 | render_window.GetFramebufferLayout().height, false), | ||
| 115 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | ||
| 116 | screen_info), | ||
| 117 | rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, | ||
| 118 | memory_allocator, state_tracker, scheduler) { | ||
| 119 | Report(); | ||
| 120 | } catch (const vk::Exception& exception) { | ||
| 121 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||
| 122 | throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; | ||
| 123 | } | ||
| 91 | 124 | ||
| 92 | RendererVulkan::~RendererVulkan() { | 125 | RendererVulkan::~RendererVulkan() { |
| 93 | ShutDown(); | 126 | void(device.GetLogical().WaitIdle()); |
| 94 | } | 127 | } |
| 95 | 128 | ||
| 96 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 129 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 101 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 134 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { |
| 102 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 135 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 103 | const bool use_accelerated = | 136 | const bool use_accelerated = |
| 104 | rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 137 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 105 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 138 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 106 | if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { | 139 | if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { |
| 107 | swapchain->Create(layout.width, layout.height, is_srgb); | 140 | swapchain.Create(layout.width, layout.height, is_srgb); |
| 108 | blit_screen->Recreate(); | 141 | blit_screen.Recreate(); |
| 109 | } | 142 | } |
| 110 | 143 | ||
| 111 | scheduler->WaitWorker(); | 144 | scheduler.WaitWorker(); |
| 112 | 145 | ||
| 113 | swapchain->AcquireNextImage(); | 146 | swapchain.AcquireNextImage(); |
| 114 | const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); | 147 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); |
| 115 | 148 | ||
| 116 | scheduler->Flush(render_semaphore); | 149 | scheduler.Flush(render_semaphore); |
| 117 | 150 | ||
| 118 | if (swapchain->Present(render_semaphore)) { | 151 | if (swapchain.Present(render_semaphore)) { |
| 119 | blit_screen->Recreate(); | 152 | blit_screen.Recreate(); |
| 120 | } | 153 | } |
| 121 | 154 | rasterizer.TickFrame(); | |
| 122 | rasterizer->TickFrame(); | ||
| 123 | } | 155 | } |
| 124 | 156 | ||
| 125 | render_window.OnFrameDisplayed(); | 157 | render_window.OnFrameDisplayed(); |
| 126 | } | 158 | } |
| 127 | 159 | ||
| 128 | bool RendererVulkan::Init() try { | ||
| 129 | library = OpenLibrary(); | ||
| 130 | instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||
| 131 | true, Settings::values.renderer_debug); | ||
| 132 | if (Settings::values.renderer_debug) { | ||
| 133 | debug_callback = CreateDebugCallback(instance); | ||
| 134 | } | ||
| 135 | surface = CreateSurface(instance, render_window); | ||
| 136 | |||
| 137 | InitializeDevice(); | ||
| 138 | Report(); | ||
| 139 | |||
| 140 | memory_allocator = std::make_unique<MemoryAllocator>(*device); | ||
| 141 | |||
| 142 | state_tracker = std::make_unique<StateTracker>(gpu); | ||
| 143 | |||
| 144 | scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); | ||
| 145 | |||
| 146 | const auto& framebuffer = render_window.GetFramebufferLayout(); | ||
| 147 | swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); | ||
| 148 | swapchain->Create(framebuffer.width, framebuffer.height, false); | ||
| 149 | |||
| 150 | rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), | ||
| 151 | cpu_memory, screen_info, *device, | ||
| 152 | *memory_allocator, *state_tracker, *scheduler); | ||
| 153 | |||
| 154 | blit_screen = | ||
| 155 | std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, | ||
| 156 | *memory_allocator, *swapchain, *scheduler, screen_info); | ||
| 157 | return true; | ||
| 158 | |||
| 159 | } catch (const vk::Exception& exception) { | ||
| 160 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||
| 161 | return false; | ||
| 162 | } | ||
| 163 | |||
| 164 | void RendererVulkan::ShutDown() { | ||
| 165 | if (!device) { | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | if (const auto& dev = device->GetLogical()) { | ||
| 169 | dev.WaitIdle(); | ||
| 170 | } | ||
| 171 | rasterizer.reset(); | ||
| 172 | blit_screen.reset(); | ||
| 173 | scheduler.reset(); | ||
| 174 | swapchain.reset(); | ||
| 175 | memory_allocator.reset(); | ||
| 176 | device.reset(); | ||
| 177 | } | ||
| 178 | |||
| 179 | void RendererVulkan::InitializeDevice() { | ||
| 180 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||
| 181 | const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||
| 182 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||
| 183 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||
| 184 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 185 | } | ||
| 186 | const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); | ||
| 187 | device = std::make_unique<Device>(*instance, physical_device, *surface, dld); | ||
| 188 | } | ||
| 189 | |||
| 190 | void RendererVulkan::Report() const { | 160 | void RendererVulkan::Report() const { |
| 191 | const std::string vendor_name{device->GetVendorName()}; | 161 | const std::string vendor_name{device.GetVendorName()}; |
| 192 | const std::string model_name{device->GetModelName()}; | 162 | const std::string model_name{device.GetModelName()}; |
| 193 | const std::string driver_version = GetDriverVersion(*device); | 163 | const std::string driver_version = GetDriverVersion(device); |
| 194 | const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); | 164 | const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); |
| 195 | 165 | ||
| 196 | const std::string api_version = GetReadableVersion(device->ApiVersion()); | 166 | const std::string api_version = GetReadableVersion(device.ApiVersion()); |
| 197 | 167 | ||
| 198 | const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); | 168 | const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions()); |
| 199 | 169 | ||
| 200 | LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); | 170 | LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); |
| 201 | LOG_INFO(Render_Vulkan, "Device: {}", model_name); | 171 | LOG_INFO(Render_Vulkan, "Device: {}", model_name); |
| @@ -209,21 +179,4 @@ void RendererVulkan::Report() const { | |||
| 209 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | 179 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); |
| 210 | } | 180 | } |
| 211 | 181 | ||
| 212 | std::vector<std::string> RendererVulkan::EnumerateDevices() try { | ||
| 213 | vk::InstanceDispatch dld; | ||
| 214 | const Common::DynamicLibrary library = OpenLibrary(); | ||
| 215 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); | ||
| 216 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | ||
| 217 | std::vector<std::string> names; | ||
| 218 | names.reserve(physical_devices.size()); | ||
| 219 | for (const VkPhysicalDevice device : physical_devices) { | ||
| 220 | names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); | ||
| 221 | } | ||
| 222 | return names; | ||
| 223 | |||
| 224 | } catch (const vk::Exception& exception) { | ||
| 225 | LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); | ||
| 226 | return {}; | ||
| 227 | } | ||
| 228 | |||
| 229 | } // namespace Vulkan | 182 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index daf55b9b4..72071316c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -9,8 +9,14 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/dynamic_library.h" | 11 | #include "common/dynamic_library.h" |
| 12 | |||
| 13 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_swapchain.h" | ||
| 18 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 21 | ||
| 16 | namespace Core { | 22 | namespace Core { |
| @@ -27,20 +33,6 @@ class GPU; | |||
| 27 | 33 | ||
| 28 | namespace Vulkan { | 34 | namespace Vulkan { |
| 29 | 35 | ||
| 30 | class Device; | ||
| 31 | class StateTracker; | ||
| 32 | class MemoryAllocator; | ||
| 33 | class VKBlitScreen; | ||
| 34 | class VKSwapchain; | ||
| 35 | class VKScheduler; | ||
| 36 | |||
| 37 | struct VKScreenInfo { | ||
| 38 | VkImageView image_view{}; | ||
| 39 | u32 width{}; | ||
| 40 | u32 height{}; | ||
| 41 | bool is_srgb{}; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class RendererVulkan final : public VideoCore::RendererBase { | 36 | class RendererVulkan final : public VideoCore::RendererBase { |
| 45 | public: | 37 | public: |
| 46 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | 38 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |
| @@ -49,15 +41,13 @@ public: | |||
| 49 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 41 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 50 | ~RendererVulkan() override; | 42 | ~RendererVulkan() override; |
| 51 | 43 | ||
| 52 | bool Init() override; | ||
| 53 | void ShutDown() override; | ||
| 54 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 44 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 55 | 45 | ||
| 56 | static std::vector<std::string> EnumerateDevices(); | 46 | VideoCore::RasterizerInterface* ReadRasterizer() override { |
| 47 | return &rasterizer; | ||
| 48 | } | ||
| 57 | 49 | ||
| 58 | private: | 50 | private: |
| 59 | void InitializeDevice(); | ||
| 60 | |||
| 61 | void Report() const; | 51 | void Report() const; |
| 62 | 52 | ||
| 63 | Core::TelemetrySession& telemetry_session; | 53 | Core::TelemetrySession& telemetry_session; |
| @@ -68,18 +58,18 @@ private: | |||
| 68 | vk::InstanceDispatch dld; | 58 | vk::InstanceDispatch dld; |
| 69 | 59 | ||
| 70 | vk::Instance instance; | 60 | vk::Instance instance; |
| 71 | 61 | vk::DebugUtilsMessenger debug_callback; | |
| 72 | vk::SurfaceKHR surface; | 62 | vk::SurfaceKHR surface; |
| 73 | 63 | ||
| 74 | VKScreenInfo screen_info; | 64 | VKScreenInfo screen_info; |
| 75 | 65 | ||
| 76 | vk::DebugUtilsMessenger debug_callback; | 66 | Device device; |
| 77 | std::unique_ptr<Device> device; | 67 | MemoryAllocator memory_allocator; |
| 78 | std::unique_ptr<MemoryAllocator> memory_allocator; | 68 | StateTracker state_tracker; |
| 79 | std::unique_ptr<StateTracker> state_tracker; | 69 | VKScheduler scheduler; |
| 80 | std::unique_ptr<VKScheduler> scheduler; | 70 | VKSwapchain swapchain; |
| 81 | std::unique_ptr<VKSwapchain> swapchain; | 71 | VKBlitScreen blit_screen; |
| 82 | std::unique_ptr<VKBlitScreen> blit_screen; | 72 | RasterizerVulkan rasterizer; |
| 83 | }; | 73 | }; |
| 84 | 74 | ||
| 85 | } // namespace Vulkan | 75 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 3e3b895e0..a1a32aabe 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "video_core/gpu.h" | 18 | #include "video_core/gpu.h" |
| 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" | 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" |
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 21 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 23 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 22 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 24 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 23 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| @@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData { | |||
| 113 | }; | 112 | }; |
| 114 | 113 | ||
| 115 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, | 114 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, |
| 116 | Core::Frontend::EmuWindow& render_window_, | 115 | Core::Frontend::EmuWindow& render_window_, const Device& device_, |
| 117 | VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | ||
| 118 | MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, | 116 | MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, |
| 119 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) | 117 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) |
| 120 | : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, | 118 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, |
| 121 | device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, | 119 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, |
| 122 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 120 | image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { |
| 123 | resource_ticks.resize(image_count); | 121 | resource_ticks.resize(image_count); |
| 124 | 122 | ||
| 125 | CreateStaticResources(); | 123 | CreateStaticResources(); |
| @@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 150 | SetUniformData(data, framebuffer); | 148 | SetUniformData(data, framebuffer); |
| 151 | SetVertexData(data, framebuffer); | 149 | SetVertexData(data, framebuffer); |
| 152 | 150 | ||
| 153 | const std::span<u8> map = buffer_commit.Map(); | 151 | const std::span<u8> mapped_span = buffer_commit.Map(); |
| 154 | std::memcpy(map.data(), &data, sizeof(data)); | 152 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 155 | 153 | ||
| 156 | if (!use_accelerated) { | 154 | if (!use_accelerated) { |
| 157 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 155 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); |
| @@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 159 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 157 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 160 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 158 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| 161 | const size_t size_bytes = GetSizeInBytes(framebuffer); | 159 | const size_t size_bytes = GetSizeInBytes(framebuffer); |
| 162 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); | ||
| 163 | 160 | ||
| 164 | // TODO(Rodrigo): Read this from HLE | 161 | // TODO(Rodrigo): Read this from HLE |
| 165 | constexpr u32 block_height_log2 = 4; | 162 | constexpr u32 block_height_log2 = 4; |
| 166 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); | 163 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); |
| 167 | Tegra::Texture::UnswizzleTexture( | 164 | Tegra::Texture::UnswizzleTexture( |
| 168 | map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, | 165 | mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), |
| 169 | framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | 166 | bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); |
| 170 | 167 | ||
| 171 | const VkBufferImageCopy copy{ | 168 | const VkBufferImageCopy copy{ |
| 172 | .bufferOffset = image_offset, | 169 | .bufferOffset = image_offset, |
| @@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 266 | cmdbuf.Draw(4, 1, 0, 0); | 263 | cmdbuf.Draw(4, 1, 0, 0); |
| 267 | cmdbuf.EndRenderPass(); | 264 | cmdbuf.EndRenderPass(); |
| 268 | }); | 265 | }); |
| 269 | |||
| 270 | return *semaphores[image_index]; | 266 | return *semaphores[image_index]; |
| 271 | } | 267 | } |
| 272 | 268 | ||
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index b52576957..5e3177685 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -38,12 +38,18 @@ class RasterizerVulkan; | |||
| 38 | class VKScheduler; | 38 | class VKScheduler; |
| 39 | class VKSwapchain; | 39 | class VKSwapchain; |
| 40 | 40 | ||
| 41 | class VKBlitScreen final { | 41 | struct VKScreenInfo { |
| 42 | VkImageView image_view{}; | ||
| 43 | u32 width{}; | ||
| 44 | u32 height{}; | ||
| 45 | bool is_srgb{}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | class VKBlitScreen { | ||
| 42 | public: | 49 | public: |
| 43 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, | 50 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, |
| 44 | Core::Frontend::EmuWindow& render_window, | 51 | Core::Frontend::EmuWindow& render_window, const Device& device, |
| 45 | VideoCore::RasterizerInterface& rasterizer, const Device& device, | 52 | MemoryAllocator& memory_manager, VKSwapchain& swapchain, |
| 46 | MemoryAllocator& memory_allocator, VKSwapchain& swapchain, | ||
| 47 | VKScheduler& scheduler, const VKScreenInfo& screen_info); | 53 | VKScheduler& scheduler, const VKScreenInfo& screen_info); |
| 48 | ~VKBlitScreen(); | 54 | ~VKBlitScreen(); |
| 49 | 55 | ||
| @@ -84,7 +90,6 @@ private: | |||
| 84 | 90 | ||
| 85 | Core::Memory::Memory& cpu_memory; | 91 | Core::Memory::Memory& cpu_memory; |
| 86 | Core::Frontend::EmuWindow& render_window; | 92 | Core::Frontend::EmuWindow& render_window; |
| 87 | VideoCore::RasterizerInterface& rasterizer; | ||
| 88 | const Device& device; | 93 | const Device& device; |
| 89 | MemoryAllocator& memory_allocator; | 94 | MemoryAllocator& memory_allocator; |
| 90 | VKSwapchain& swapchain; | 95 | VKSwapchain& swapchain; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8ad40a0f..848eedd66 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -3,188 +3,308 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | ||
| 6 | #include <cstring> | 7 | #include <cstring> |
| 7 | #include <memory> | 8 | #include <span> |
| 9 | #include <vector> | ||
| 8 | 10 | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 13 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_device.h" | 17 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 16 | 20 | ||
| 17 | namespace Vulkan { | 21 | namespace Vulkan { |
| 18 | |||
| 19 | namespace { | 22 | namespace { |
| 23 | VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { | ||
| 24 | return VkBufferCopy{ | ||
| 25 | .srcOffset = copy.src_offset, | ||
| 26 | .dstOffset = copy.dst_offset, | ||
| 27 | .size = copy.size, | ||
| 28 | }; | ||
| 29 | } | ||
| 20 | 30 | ||
| 21 | constexpr VkBufferUsageFlags BUFFER_USAGE = | 31 | VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) { |
| 22 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | 32 | if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) { |
| 23 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | 33 | return VK_INDEX_TYPE_UINT8_EXT; |
| 24 | 34 | } | |
| 25 | constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = | 35 | if (num_elements <= 0xffff) { |
| 26 | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | | 36 | return VK_INDEX_TYPE_UINT16; |
| 27 | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | 37 | } |
| 28 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | 38 | return VK_INDEX_TYPE_UINT32; |
| 29 | 39 | } | |
| 30 | constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | ||
| 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | ||
| 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | ||
| 33 | 40 | ||
| 34 | constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = | 41 | size_t BytesPerIndex(VkIndexType index_type) { |
| 35 | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; | 42 | switch (index_type) { |
| 43 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 44 | return 1; | ||
| 45 | case VK_INDEX_TYPE_UINT16: | ||
| 46 | return 2; | ||
| 47 | case VK_INDEX_TYPE_UINT32: | ||
| 48 | return 4; | ||
| 49 | default: | ||
| 50 | UNREACHABLE_MSG("Invalid index type={}", index_type); | ||
| 51 | return 1; | ||
| 52 | } | ||
| 53 | } | ||
| 36 | 54 | ||
| 55 | template <typename T> | ||
| 56 | std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||
| 57 | std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||
| 58 | std::ranges::transform(indices, indices.begin(), | ||
| 59 | [quad, first](u32 index) { return first + index + quad * 4; }); | ||
| 60 | return indices; | ||
| 61 | } | ||
| 37 | } // Anonymous namespace | 62 | } // Anonymous namespace |
| 38 | 63 | ||
| 39 | Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, | 64 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 40 | StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | 65 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} |
| 41 | : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ | 66 | |
| 42 | staging_pool_} { | 67 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 43 | buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 68 | VAddr cpu_addr_, u64 size_bytes_) |
| 69 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 70 | buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 44 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 71 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 45 | .pNext = nullptr, | 72 | .pNext = nullptr, |
| 46 | .flags = 0, | 73 | .flags = 0, |
| 47 | .size = static_cast<VkDeviceSize>(size_), | 74 | .size = SizeBytes(), |
| 48 | .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 75 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 76 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 77 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 78 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 79 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 49 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 80 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 50 | .queueFamilyIndexCount = 0, | 81 | .queueFamilyIndexCount = 0, |
| 51 | .pQueueFamilyIndices = nullptr, | 82 | .pQueueFamilyIndices = nullptr, |
| 52 | }); | 83 | }); |
| 53 | commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 84 | if (runtime.device.HasDebuggingToolAttached()) { |
| 85 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||
| 86 | } | ||
| 87 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 54 | } | 88 | } |
| 55 | 89 | ||
| 56 | Buffer::~Buffer() = default; | 90 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 91 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 93 | VKDescriptorPool& descriptor_pool) | ||
| 94 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 95 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||
| 96 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 97 | quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | ||
| 57 | 98 | ||
| 58 | void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | 99 | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { |
| 59 | const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); | 100 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 60 | std::memcpy(staging.mapped_span.data(), data, data_size); | 101 | } |
| 61 | 102 | ||
| 62 | scheduler.RequestOutsideRenderPassOperationContext(); | 103 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 104 | return staging_pool.Request(size, MemoryUsage::Download); | ||
| 105 | } | ||
| 63 | 106 | ||
| 64 | const VkBuffer handle = Handle(); | 107 | void BufferCacheRuntime::Finish() { |
| 65 | scheduler.Record([staging = staging.buffer, handle, offset, data_size, | 108 | scheduler.Finish(); |
| 66 | &device = device](vk::CommandBuffer cmdbuf) { | 109 | } |
| 67 | const VkBufferMemoryBarrier read_barrier{ | 110 | |
| 68 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 111 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 69 | .pNext = nullptr, | 112 | std::span<const VideoCommon::BufferCopy> copies) { |
| 70 | .srcAccessMask = | 113 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 71 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | | 114 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 72 | VK_ACCESS_HOST_WRITE_BIT | | 115 | .pNext = nullptr, |
| 73 | (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), | 116 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| 74 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | 117 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 75 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 118 | }; |
| 76 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 119 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 77 | .buffer = handle, | 120 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 78 | .offset = offset, | 121 | .pNext = nullptr, |
| 79 | .size = data_size, | 122 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 80 | }; | 123 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 81 | const VkBufferMemoryBarrier write_barrier{ | 124 | }; |
| 82 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 125 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 83 | .pNext = nullptr, | 126 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); |
| 84 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 127 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 85 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | 128 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 86 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 129 | scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 87 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 88 | .buffer = handle, | ||
| 89 | .offset = offset, | ||
| 90 | .size = data_size, | ||
| 91 | }; | ||
| 92 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 130 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 93 | 0, read_barrier); | 131 | 0, READ_BARRIER); |
| 94 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | 132 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); |
| 95 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | 133 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 96 | write_barrier); | 134 | 0, WRITE_BARRIER); |
| 97 | }); | 135 | }); |
| 98 | } | 136 | } |
| 99 | 137 | ||
| 100 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, |
| 101 | auto staging = staging_pool.Request(data_size, MemoryUsage::Download); | 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, |
| 102 | scheduler.RequestOutsideRenderPassOperationContext(); | 140 | u32 offset, [[maybe_unused]] u32 size) { |
| 141 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | ||
| 142 | VkDeviceSize vk_offset = offset; | ||
| 143 | VkBuffer vk_buffer = buffer; | ||
| 144 | if (topology == PrimitiveTopology::Quads) { | ||
| 145 | vk_index_type = VK_INDEX_TYPE_UINT32; | ||
| 146 | std::tie(vk_buffer, vk_offset) = | ||
| 147 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | ||
| 148 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||
| 149 | vk_index_type = VK_INDEX_TYPE_UINT16; | ||
| 150 | std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||
| 151 | } | ||
| 152 | if (vk_buffer == VK_NULL_HANDLE) { | ||
| 153 | // Vulkan doesn't support null index buffers. Replace it with our own null buffer. | ||
| 154 | ReserveNullIndexBuffer(); | ||
| 155 | vk_buffer = *null_index_buffer; | ||
| 156 | } | ||
| 157 | scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { | ||
| 158 | cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); | ||
| 159 | }); | ||
| 160 | } | ||
| 103 | 161 | ||
| 104 | const VkBuffer handle = Handle(); | 162 | void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { |
| 105 | scheduler.Record( | 163 | ReserveQuadArrayLUT(first + count, true); |
| 106 | [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | ||
| 107 | const VkBufferMemoryBarrier barrier{ | ||
| 108 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 109 | .pNext = nullptr, | ||
| 110 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 111 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 112 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 113 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 114 | .buffer = handle, | ||
| 115 | .offset = offset, | ||
| 116 | .size = data_size, | ||
| 117 | }; | ||
| 118 | |||
| 119 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | | ||
| 120 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||
| 121 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 122 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | ||
| 123 | cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); | ||
| 124 | }); | ||
| 125 | scheduler.Finish(); | ||
| 126 | 164 | ||
| 127 | std::memcpy(data, staging.mapped_span.data(), data_size); | 165 | // The LUT has the indices 0, 1, 2, and 3 copied as an array |
| 166 | // To apply these 'first' offsets we can apply an offset based on the modulus. | ||
| 167 | const VkIndexType index_type = quad_array_lut_index_type; | ||
| 168 | const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | ||
| 169 | const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | ||
| 170 | scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | ||
| 171 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||
| 172 | }); | ||
| 128 | } | 173 | } |
| 129 | 174 | ||
| 130 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 175 | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, |
| 131 | std::size_t copy_size) { | 176 | u32 stride) { |
| 132 | scheduler.RequestOutsideRenderPassOperationContext(); | 177 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 178 | scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { | ||
| 179 | const VkDeviceSize vk_offset = offset; | ||
| 180 | const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; | ||
| 181 | const VkDeviceSize vk_stride = stride; | ||
| 182 | cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); | ||
| 183 | }); | ||
| 184 | } else { | ||
| 185 | scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { | ||
| 186 | cmdbuf.BindVertexBuffer(index, buffer, offset); | ||
| 187 | }); | ||
| 188 | } | ||
| 189 | } | ||
| 133 | 190 | ||
| 134 | const VkBuffer dst_buffer = Handle(); | 191 | void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, |
| 135 | scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | 192 | u32 size) { |
| 136 | copy_size](vk::CommandBuffer cmdbuf) { | 193 | if (!device.IsExtTransformFeedbackSupported()) { |
| 137 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); | 194 | // Already logged in the rasterizer |
| 138 | 195 | return; | |
| 139 | std::array<VkBufferMemoryBarrier, 2> barriers; | 196 | } |
| 140 | barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 197 | scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { |
| 141 | barriers[0].pNext = nullptr; | 198 | const VkDeviceSize vk_offset = offset; |
| 142 | barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | 199 | const VkDeviceSize vk_size = size; |
| 143 | barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | 200 | cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); |
| 144 | barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 145 | barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 146 | barriers[0].buffer = src_buffer; | ||
| 147 | barriers[0].offset = src_offset; | ||
| 148 | barriers[0].size = copy_size; | ||
| 149 | barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 150 | barriers[1].pNext = nullptr; | ||
| 151 | barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 152 | barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; | ||
| 153 | barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 154 | barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 155 | barriers[1].buffer = dst_buffer; | ||
| 156 | barriers[1].offset = dst_offset; | ||
| 157 | barriers[1].size = copy_size; | ||
| 158 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||
| 159 | barriers, {}); | ||
| 160 | }); | 201 | }); |
| 161 | } | 202 | } |
| 162 | 203 | ||
| 163 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 204 | void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { |
| 164 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 205 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 165 | const Device& device_, MemoryAllocator& memory_allocator_, | 206 | } |
| 166 | VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, | ||
| 167 | StagingBufferPool& staging_pool_) | ||
| 168 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||
| 169 | cpu_memory_, stream_buffer_}, | ||
| 170 | device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 171 | staging_pool{staging_pool_} {} | ||
| 172 | 207 | ||
| 173 | VKBufferCache::~VKBufferCache() = default; | 208 | void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { |
| 209 | if (num_indices <= current_num_indices) { | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | if (wait_for_idle) { | ||
| 213 | scheduler.Finish(); | ||
| 214 | } | ||
| 215 | current_num_indices = num_indices; | ||
| 216 | quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||
| 174 | 217 | ||
| 175 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 218 | const u32 num_quads = num_indices / 4; |
| 176 | return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, | 219 | const u32 num_triangle_indices = num_quads * 6; |
| 177 | size); | 220 | const u32 num_first_offset_copies = 4; |
| 221 | const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||
| 222 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||
| 223 | quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .flags = 0, | ||
| 227 | .size = size_bytes, | ||
| 228 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 229 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 230 | .queueFamilyIndexCount = 0, | ||
| 231 | .pQueueFamilyIndices = nullptr, | ||
| 232 | }); | ||
| 233 | if (device.HasDebuggingToolAttached()) { | ||
| 234 | quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||
| 235 | } | ||
| 236 | quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||
| 237 | |||
| 238 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 239 | u8* staging_data = staging.mapped_span.data(); | ||
| 240 | const size_t quad_size = bytes_per_index * 6; | ||
| 241 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||
| 242 | for (u32 quad = 0; quad < num_quads; ++quad) { | ||
| 243 | switch (quad_array_lut_index_type) { | ||
| 244 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 245 | std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||
| 246 | break; | ||
| 247 | case VK_INDEX_TYPE_UINT16: | ||
| 248 | std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||
| 249 | break; | ||
| 250 | case VK_INDEX_TYPE_UINT32: | ||
| 251 | std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||
| 252 | break; | ||
| 253 | default: | ||
| 254 | UNREACHABLE(); | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | staging_data += quad_size; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 261 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||
| 262 | dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { | ||
| 263 | const VkBufferCopy copy{ | ||
| 264 | .srcOffset = src_offset, | ||
| 265 | .dstOffset = 0, | ||
| 266 | .size = size_bytes, | ||
| 267 | }; | ||
| 268 | const VkBufferMemoryBarrier write_barrier{ | ||
| 269 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 270 | .pNext = nullptr, | ||
| 271 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 272 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||
| 273 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 274 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 275 | .buffer = dst_buffer, | ||
| 276 | .offset = 0, | ||
| 277 | .size = size_bytes, | ||
| 278 | }; | ||
| 279 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 280 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||
| 281 | 0, write_barrier); | ||
| 282 | }); | ||
| 178 | } | 283 | } |
| 179 | 284 | ||
| 180 | VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | 285 | void BufferCacheRuntime::ReserveNullIndexBuffer() { |
| 181 | size = std::max(size, std::size_t(4)); | 286 | if (null_index_buffer) { |
| 182 | const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); | 287 | return; |
| 288 | } | ||
| 289 | null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 290 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 291 | .pNext = nullptr, | ||
| 292 | .flags = 0, | ||
| 293 | .size = 4, | ||
| 294 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 295 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 296 | .queueFamilyIndexCount = 0, | ||
| 297 | .pQueueFamilyIndices = nullptr, | ||
| 298 | }); | ||
| 299 | if (device.HasDebuggingToolAttached()) { | ||
| 300 | null_index_buffer.SetObjectNameEXT("Null index buffer"); | ||
| 301 | } | ||
| 302 | null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal); | ||
| 303 | |||
| 183 | scheduler.RequestOutsideRenderPassOperationContext(); | 304 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 184 | scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { | 305 | scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) { |
| 185 | cmdbuf.FillBuffer(buffer, 0, size, 0); | 306 | cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); |
| 186 | }); | 307 | }); |
| 187 | return {empty.buffer, 0, 0}; | ||
| 188 | } | 308 | } |
| 189 | 309 | ||
| 190 | } // namespace Vulkan | 310 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 41d577510..041e6515c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -4,69 +4,124 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 7 | #include "video_core/buffer_cache/buffer_cache.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 12 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 13 | ||
| 16 | namespace Vulkan { | 14 | namespace Vulkan { |
| 17 | 15 | ||
| 18 | class Device; | 16 | class Device; |
| 17 | class VKDescriptorPool; | ||
| 19 | class VKScheduler; | 18 | class VKScheduler; |
| 19 | class VKUpdateDescriptorQueue; | ||
| 20 | 20 | ||
| 21 | class Buffer final : public VideoCommon::BufferBlock { | 21 | class BufferCacheRuntime; |
| 22 | public: | ||
| 23 | explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, | ||
| 24 | StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); | ||
| 25 | ~Buffer(); | ||
| 26 | |||
| 27 | void Upload(std::size_t offset, std::size_t data_size, const u8* data); | ||
| 28 | 22 | ||
| 29 | void Download(std::size_t offset, std::size_t data_size, u8* data); | 23 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { |
| 30 | 24 | public: | |
| 31 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 25 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); |
| 32 | std::size_t copy_size); | 26 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 27 | VAddr cpu_addr_, u64 size_bytes_); | ||
| 33 | 28 | ||
| 34 | VkBuffer Handle() const { | 29 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 35 | return *buffer; | 30 | return *buffer; |
| 36 | } | 31 | } |
| 37 | 32 | ||
| 38 | u64 Address() const { | 33 | operator VkBuffer() const noexcept { |
| 39 | return 0; | 34 | return *buffer; |
| 40 | } | 35 | } |
| 41 | 36 | ||
| 42 | private: | 37 | private: |
| 43 | const Device& device; | ||
| 44 | VKScheduler& scheduler; | ||
| 45 | StagingBufferPool& staging_pool; | ||
| 46 | |||
| 47 | vk::Buffer buffer; | 38 | vk::Buffer buffer; |
| 48 | MemoryCommit commit; | 39 | MemoryCommit commit; |
| 49 | }; | 40 | }; |
| 50 | 41 | ||
| 51 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 42 | class BufferCacheRuntime { |
| 43 | friend Buffer; | ||
| 44 | |||
| 45 | using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology; | ||
| 46 | using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat; | ||
| 47 | |||
| 52 | public: | 48 | public: |
| 53 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, | 49 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, |
| 54 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | 50 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 55 | const Device& device, MemoryAllocator& memory_allocator, | 51 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 56 | VKScheduler& scheduler, VKStreamBuffer& stream_buffer, | 52 | VKDescriptorPool& descriptor_pool); |
| 57 | StagingBufferPool& staging_pool); | 53 | |
| 58 | ~VKBufferCache(); | 54 | void Finish(); |
| 55 | |||
| 56 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | ||
| 57 | |||
| 58 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | ||
| 59 | |||
| 60 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | ||
| 61 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 62 | |||
| 63 | void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | ||
| 64 | u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | ||
| 59 | 65 | ||
| 60 | BufferInfo GetEmptyBuffer(std::size_t size) override; | 66 | void BindQuadArrayIndexBuffer(u32 first, u32 count); |
| 61 | 67 | ||
| 62 | protected: | 68 | void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); |
| 63 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 69 | |
| 70 | void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); | ||
| 71 | |||
| 72 | std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, | ||
| 73 | [[maybe_unused]] u32 binding_index, u32 size) { | ||
| 74 | const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); | ||
| 75 | BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size); | ||
| 76 | return ref.mapped_span; | ||
| 77 | } | ||
| 78 | |||
| 79 | void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { | ||
| 80 | BindBuffer(buffer, offset, size); | ||
| 81 | } | ||
| 82 | |||
| 83 | void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size, | ||
| 84 | [[maybe_unused]] bool is_written) { | ||
| 85 | BindBuffer(buffer, offset, size); | ||
| 86 | } | ||
| 64 | 87 | ||
| 65 | private: | 88 | private: |
| 89 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size); | ||
| 90 | |||
| 91 | void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||
| 92 | |||
| 93 | void ReserveNullIndexBuffer(); | ||
| 94 | |||
| 66 | const Device& device; | 95 | const Device& device; |
| 67 | MemoryAllocator& memory_allocator; | 96 | MemoryAllocator& memory_allocator; |
| 68 | VKScheduler& scheduler; | 97 | VKScheduler& scheduler; |
| 69 | StagingBufferPool& staging_pool; | 98 | StagingBufferPool& staging_pool; |
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 100 | |||
| 101 | vk::Buffer quad_array_lut; | ||
| 102 | MemoryCommit quad_array_lut_commit; | ||
| 103 | VkIndexType quad_array_lut_index_type{}; | ||
| 104 | u32 current_num_indices = 0; | ||
| 105 | |||
| 106 | vk::Buffer null_index_buffer; | ||
| 107 | MemoryCommit null_index_buffer_commit; | ||
| 108 | |||
| 109 | Uint8Pass uint8_pass; | ||
| 110 | QuadIndexedPass quad_index_pass; | ||
| 70 | }; | 111 | }; |
| 71 | 112 | ||
| 113 | struct BufferCacheParams { | ||
| 114 | using Runtime = Vulkan::BufferCacheRuntime; | ||
| 115 | using Buffer = Vulkan::Buffer; | ||
| 116 | |||
| 117 | static constexpr bool IS_OPENGL = false; | ||
| 118 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||
| 119 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | ||
| 120 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | ||
| 121 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | ||
| 122 | static constexpr bool USE_MEMORY_MAPS = true; | ||
| 123 | }; | ||
| 124 | |||
| 125 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||
| 126 | |||
| 72 | } // namespace Vulkan | 127 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5eb6a54be..2f9a7b028 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -22,30 +22,7 @@ | |||
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 23 | 23 | ||
| 24 | namespace Vulkan { | 24 | namespace Vulkan { |
| 25 | |||
| 26 | namespace { | 25 | namespace { |
| 27 | |||
| 28 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | ||
| 29 | return { | ||
| 30 | .binding = 0, | ||
| 31 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 32 | .descriptorCount = 1, | ||
| 33 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 34 | .pImmutableSamplers = nullptr, | ||
| 35 | }; | ||
| 36 | } | ||
| 37 | |||
| 38 | VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { | ||
| 39 | return { | ||
| 40 | .dstBinding = 0, | ||
| 41 | .dstArrayElement = 0, | ||
| 42 | .descriptorCount = 1, | ||
| 43 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 44 | .offset = 0, | ||
| 45 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 46 | }; | ||
| 47 | } | ||
| 48 | |||
| 49 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 26 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 50 | return { | 27 | return { |
| 51 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 28 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| @@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( | |||
| 162 | return set; | 139 | return set; |
| 163 | } | 140 | } |
| 164 | 141 | ||
| 165 | QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 166 | VKDescriptorPool& descriptor_pool_, | ||
| 167 | StagingBufferPool& staging_buffer_pool_, | ||
| 168 | VKUpdateDescriptorQueue& update_descriptor_queue_) | ||
| 169 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | ||
| 170 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | ||
| 171 | BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), | ||
| 172 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 173 | update_descriptor_queue{update_descriptor_queue_} {} | ||
| 174 | |||
| 175 | QuadArrayPass::~QuadArrayPass() = default; | ||
| 176 | |||
| 177 | std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||
| 178 | const u32 num_triangle_vertices = (num_vertices / 4) * 6; | ||
| 179 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||
| 180 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||
| 181 | |||
| 182 | update_descriptor_queue.Acquire(); | ||
| 183 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||
| 184 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 185 | |||
| 186 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 187 | |||
| 188 | ASSERT(num_vertices % 4 == 0); | ||
| 189 | const u32 num_quads = num_vertices / 4; | ||
| 190 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, | ||
| 191 | num_quads, first, set](vk::CommandBuffer cmdbuf) { | ||
| 192 | constexpr u32 dispatch_size = 1024; | ||
| 193 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 194 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 195 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); | ||
| 196 | cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); | ||
| 197 | |||
| 198 | VkBufferMemoryBarrier barrier; | ||
| 199 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 200 | barrier.pNext = nullptr; | ||
| 201 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 202 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 203 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 204 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 205 | barrier.buffer = buffer; | ||
| 206 | barrier.offset = 0; | ||
| 207 | barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); | ||
| 208 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 209 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); | ||
| 210 | }); | ||
| 211 | return {staging_ref.buffer, 0}; | ||
| 212 | } | ||
| 213 | |||
| 214 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | 142 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, |
| 215 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | 143 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, |
| 216 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 144 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| @@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 221 | 149 | ||
| 222 | Uint8Pass::~Uint8Pass() = default; | 150 | Uint8Pass::~Uint8Pass() = default; |
| 223 | 151 | ||
| 224 | std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | 152 | std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 225 | u64 src_offset) { | 153 | u32 src_offset) { |
| 226 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 154 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 227 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 155 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 228 | 156 | ||
| 229 | update_descriptor_queue.Acquire(); | 157 | update_descriptor_queue.Acquire(); |
| 230 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 158 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 231 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 159 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 232 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 160 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 233 | 161 | ||
| 234 | scheduler.RequestOutsideRenderPassOperationContext(); | 162 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 235 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 163 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 236 | num_vertices](vk::CommandBuffer cmdbuf) { | 164 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 237 | constexpr u32 dispatch_size = 1024; | 165 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 166 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 170 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 171 | }; | ||
| 238 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 172 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 239 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 173 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 240 | cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); | 174 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 241 | |||
| 242 | VkBufferMemoryBarrier barrier; | ||
| 243 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 244 | barrier.pNext = nullptr; | ||
| 245 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 246 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 247 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 248 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 249 | barrier.buffer = buffer; | ||
| 250 | barrier.offset = 0; | ||
| 251 | barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||
| 252 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 175 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 253 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 176 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 254 | }); | 177 | }); |
| 255 | return {staging_ref.buffer, 0}; | 178 | return {staging.buffer, staging.offset}; |
| 256 | } | 179 | } |
| 257 | 180 | ||
| 258 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 181 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| @@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 267 | 190 | ||
| 268 | QuadIndexedPass::~QuadIndexedPass() = default; | 191 | QuadIndexedPass::~QuadIndexedPass() = default; |
| 269 | 192 | ||
| 270 | std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | 193 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( |
| 271 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 194 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 272 | VkBuffer src_buffer, u64 src_offset) { | 195 | VkBuffer src_buffer, u32 src_offset) { |
| 273 | const u32 index_shift = [index_format] { | 196 | const u32 index_shift = [index_format] { |
| 274 | switch (index_format) { | 197 | switch (index_format) { |
| 275 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | 198 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: |
| @@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | |||
| 286 | const u32 num_tri_vertices = (num_vertices / 4) * 6; | 209 | const u32 num_tri_vertices = (num_vertices / 4) * 6; |
| 287 | 210 | ||
| 288 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); | 211 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); |
| 289 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 212 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 290 | 213 | ||
| 291 | update_descriptor_queue.Acquire(); | 214 | update_descriptor_queue.Acquire(); |
| 292 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 215 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 293 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 216 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 294 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 217 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 295 | 218 | ||
| 296 | scheduler.RequestOutsideRenderPassOperationContext(); | 219 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 297 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 220 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 298 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 221 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { |
| 299 | static constexpr u32 dispatch_size = 1024; | 222 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 223 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 227 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 228 | }; | ||
| 300 | const std::array push_constants = {base_vertex, index_shift}; | 229 | const std::array push_constants = {base_vertex, index_shift}; |
| 301 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 230 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 302 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 231 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 303 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 232 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), |
| 304 | &push_constants); | 233 | &push_constants); |
| 305 | cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | 234 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 306 | |||
| 307 | VkBufferMemoryBarrier barrier; | ||
| 308 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 309 | barrier.pNext = nullptr; | ||
| 310 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 311 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 312 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 313 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 314 | barrier.buffer = buffer; | ||
| 315 | barrier.offset = 0; | ||
| 316 | barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||
| 317 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 235 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 318 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 236 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 319 | }); | 237 | }); |
| 320 | return {staging_ref.buffer, 0}; | 238 | return {staging.buffer, staging.offset}; |
| 321 | } | 239 | } |
| 322 | 240 | ||
| 323 | } // namespace Vulkan | 241 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index f5c6f5f17..17d781d99 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -41,22 +41,6 @@ private: | |||
| 41 | vk::ShaderModule module; | 41 | vk::ShaderModule module; |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | class QuadArrayPass final : public VKComputePass { | ||
| 45 | public: | ||
| 46 | explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 47 | VKDescriptorPool& descriptor_pool_, | ||
| 48 | StagingBufferPool& staging_buffer_pool_, | ||
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue_); | ||
| 50 | ~QuadArrayPass(); | ||
| 51 | |||
| 52 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); | ||
| 53 | |||
| 54 | private: | ||
| 55 | VKScheduler& scheduler; | ||
| 56 | StagingBufferPool& staging_buffer_pool; | ||
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class Uint8Pass final : public VKComputePass { | 44 | class Uint8Pass final : public VKComputePass { |
| 61 | public: | 45 | public: |
| 62 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | 46 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| @@ -64,7 +48,10 @@ public: | |||
| 64 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 48 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 65 | ~Uint8Pass(); | 49 | ~Uint8Pass(); |
| 66 | 50 | ||
| 67 | std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); | 51 | /// Assemble uint8 indices into an uint16 index buffer |
| 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is | ||
| 53 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, | ||
| 54 | u32 src_offset); | ||
| 68 | 55 | ||
| 69 | private: | 56 | private: |
| 70 | VKScheduler& scheduler; | 57 | VKScheduler& scheduler; |
| @@ -80,9 +67,9 @@ public: | |||
| 80 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 67 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 81 | ~QuadIndexedPass(); | 68 | ~QuadIndexedPass(); |
| 82 | 69 | ||
| 83 | std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | 70 | std::pair<VkBuffer, VkDeviceSize> Assemble( |
| 84 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | 71 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |
| 85 | u64 src_offset); | 72 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset); |
| 86 | 73 | ||
| 87 | private: | 74 | private: |
| 88 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 6cd00884d..3bec48d14 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -45,8 +45,8 @@ void InnerFence::Wait() { | |||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 48 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 48 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 49 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 49 | VKQueryCache& query_cache_, const Device& device_, |
| 50 | VKScheduler& scheduler_) | 50 | VKScheduler& scheduler_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 52 | scheduler{scheduler_} {} | 52 | scheduler{scheduler_} {} |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 9c5e5aa8f..2f8322d29 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -22,7 +22,6 @@ class RasterizerInterface; | |||
| 22 | namespace Vulkan { | 22 | namespace Vulkan { |
| 23 | 23 | ||
| 24 | class Device; | 24 | class Device; |
| 25 | class VKBufferCache; | ||
| 26 | class VKQueryCache; | 25 | class VKQueryCache; |
| 27 | class VKScheduler; | 26 | class VKScheduler; |
| 28 | 27 | ||
| @@ -45,14 +44,14 @@ private: | |||
| 45 | using Fence = std::shared_ptr<InnerFence>; | 44 | using Fence = std::shared_ptr<InnerFence>; |
| 46 | 45 | ||
| 47 | using GenericFenceManager = | 46 | using GenericFenceManager = |
| 48 | VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; | 47 | VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>; |
| 49 | 48 | ||
| 50 | class VKFenceManager final : public GenericFenceManager { | 49 | class VKFenceManager final : public GenericFenceManager { |
| 51 | public: | 50 | public: |
| 52 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 51 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, |
| 53 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 52 | TextureCache& texture_cache, BufferCache& buffer_cache, |
| 54 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 53 | VKQueryCache& query_cache, const Device& device, |
| 55 | VKScheduler& scheduler_); | 54 | VKScheduler& scheduler); |
| 56 | 55 | ||
| 57 | protected: | 56 | protected: |
| 58 | Fence CreateFence(u32 value, bool is_stubbed) override; | 57 | Fence CreateFence(u32 value, bool is_stubbed) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0a111829..684d4e3a6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 14 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 15 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| @@ -24,7 +22,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 22 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| @@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25 | |||
| 50 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); |
| 51 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); |
| 52 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); |
| 53 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 57 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 58 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 59 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | 50 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); |
| 60 | 51 | ||
| 61 | namespace { | 52 | namespace { |
| 53 | struct DrawParams { | ||
| 54 | u32 base_instance; | ||
| 55 | u32 num_instances; | ||
| 56 | u32 base_vertex; | ||
| 57 | u32 num_vertices; | ||
| 58 | bool is_indexed; | ||
| 59 | }; | ||
| 62 | 60 | ||
| 63 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | 61 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); |
| 64 | 62 | ||
| @@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 67 | const float width = src.scale_x * 2.0f; | 65 | const float width = src.scale_x * 2.0f; |
| 68 | const float height = src.scale_y * 2.0f; | 66 | const float height = src.scale_y * 2.0f; |
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | 67 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; |
| 70 | |||
| 71 | VkViewport viewport{ | 68 | VkViewport viewport{ |
| 72 | .x = src.translate_x - src.scale_x, | 69 | .x = src.translate_x - src.scale_x, |
| 73 | .y = src.translate_y - src.scale_y, | 70 | .y = src.translate_y - src.scale_y, |
| @@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 76 | .minDepth = src.translate_z - src.scale_z * reduce_z, | 73 | .minDepth = src.translate_z - src.scale_z * reduce_z, |
| 77 | .maxDepth = src.translate_z + src.scale_z, | 74 | .maxDepth = src.translate_z + src.scale_z, |
| 78 | }; | 75 | }; |
| 79 | |||
| 80 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | 76 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { |
| 81 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); | 77 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); |
| 82 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); | 78 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); |
| 83 | } | 79 | } |
| 84 | |||
| 85 | return viewport; | 80 | return viewport; |
| 86 | } | 81 | } |
| 87 | 82 | ||
| @@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
| 146 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 147 | } | 142 | } |
| 148 | 143 | ||
| 149 | template <size_t N> | ||
| 150 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | ||
| 151 | std::array<VkDeviceSize, N> expanded; | ||
| 152 | std::copy(strides.begin(), strides.end(), expanded.begin()); | ||
| 153 | return expanded; | ||
| 154 | } | ||
| 155 | |||
| 156 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |
| 157 | if (entry.is_buffer) { | 145 | if (entry.is_buffer) { |
| 158 | return ImageViewType::e2D; | 146 | return ImageViewType::e2D; |
| @@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca | |||
| 221 | } | 209 | } |
| 222 | } | 210 | } |
| 223 | 211 | ||
| 224 | } // Anonymous namespace | 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 225 | 213 | bool is_indexed) { | |
| 226 | class BufferBindings final { | 214 | DrawParams params{ |
| 227 | public: | 215 | .base_instance = regs.vb_base_instance, |
| 228 | void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { | 216 | .num_instances = is_instanced ? num_instances : 1, |
| 229 | vertex.buffers[vertex.num_buffers] = buffer; | 217 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, |
| 230 | vertex.offsets[vertex.num_buffers] = offset; | 218 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, |
| 231 | vertex.sizes[vertex.num_buffers] = size; | 219 | .is_indexed = is_indexed, |
| 232 | vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); | 220 | }; |
| 233 | ++vertex.num_buffers; | 221 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| 234 | } | 222 | // 6 triangle vertices per quad, base vertex is part of the index |
| 235 | 223 | // See BindQuadArrayIndexBuffer for more details | |
| 236 | void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { | 224 | params.num_vertices = (params.num_vertices / 4) * 6; |
| 237 | index.buffer = buffer; | 225 | params.base_vertex = 0; |
| 238 | index.offset = offset; | 226 | params.is_indexed = true; |
| 239 | index.type = type; | ||
| 240 | } | ||
| 241 | |||
| 242 | void Bind(const Device& device, VKScheduler& scheduler) const { | ||
| 243 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 244 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 245 | switch (vertex.num_buffers) { | ||
| 246 | case 0: | ||
| 247 | return BindStatic<0>(device, scheduler); | ||
| 248 | case 1: | ||
| 249 | return BindStatic<1>(device, scheduler); | ||
| 250 | case 2: | ||
| 251 | return BindStatic<2>(device, scheduler); | ||
| 252 | case 3: | ||
| 253 | return BindStatic<3>(device, scheduler); | ||
| 254 | case 4: | ||
| 255 | return BindStatic<4>(device, scheduler); | ||
| 256 | case 5: | ||
| 257 | return BindStatic<5>(device, scheduler); | ||
| 258 | case 6: | ||
| 259 | return BindStatic<6>(device, scheduler); | ||
| 260 | case 7: | ||
| 261 | return BindStatic<7>(device, scheduler); | ||
| 262 | case 8: | ||
| 263 | return BindStatic<8>(device, scheduler); | ||
| 264 | case 9: | ||
| 265 | return BindStatic<9>(device, scheduler); | ||
| 266 | case 10: | ||
| 267 | return BindStatic<10>(device, scheduler); | ||
| 268 | case 11: | ||
| 269 | return BindStatic<11>(device, scheduler); | ||
| 270 | case 12: | ||
| 271 | return BindStatic<12>(device, scheduler); | ||
| 272 | case 13: | ||
| 273 | return BindStatic<13>(device, scheduler); | ||
| 274 | case 14: | ||
| 275 | return BindStatic<14>(device, scheduler); | ||
| 276 | case 15: | ||
| 277 | return BindStatic<15>(device, scheduler); | ||
| 278 | case 16: | ||
| 279 | return BindStatic<16>(device, scheduler); | ||
| 280 | case 17: | ||
| 281 | return BindStatic<17>(device, scheduler); | ||
| 282 | case 18: | ||
| 283 | return BindStatic<18>(device, scheduler); | ||
| 284 | case 19: | ||
| 285 | return BindStatic<19>(device, scheduler); | ||
| 286 | case 20: | ||
| 287 | return BindStatic<20>(device, scheduler); | ||
| 288 | case 21: | ||
| 289 | return BindStatic<21>(device, scheduler); | ||
| 290 | case 22: | ||
| 291 | return BindStatic<22>(device, scheduler); | ||
| 292 | case 23: | ||
| 293 | return BindStatic<23>(device, scheduler); | ||
| 294 | case 24: | ||
| 295 | return BindStatic<24>(device, scheduler); | ||
| 296 | case 25: | ||
| 297 | return BindStatic<25>(device, scheduler); | ||
| 298 | case 26: | ||
| 299 | return BindStatic<26>(device, scheduler); | ||
| 300 | case 27: | ||
| 301 | return BindStatic<27>(device, scheduler); | ||
| 302 | case 28: | ||
| 303 | return BindStatic<28>(device, scheduler); | ||
| 304 | case 29: | ||
| 305 | return BindStatic<29>(device, scheduler); | ||
| 306 | case 30: | ||
| 307 | return BindStatic<30>(device, scheduler); | ||
| 308 | case 31: | ||
| 309 | return BindStatic<31>(device, scheduler); | ||
| 310 | case 32: | ||
| 311 | return BindStatic<32>(device, scheduler); | ||
| 312 | } | ||
| 313 | UNREACHABLE(); | ||
| 314 | } | ||
| 315 | |||
| 316 | private: | ||
| 317 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 318 | struct { | ||
| 319 | size_t num_buffers = 0; | ||
| 320 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | ||
| 321 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 322 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | ||
| 323 | std::array<u16, Maxwell::NumVertexArrays> strides; | ||
| 324 | } vertex; | ||
| 325 | |||
| 326 | struct { | ||
| 327 | VkBuffer buffer = nullptr; | ||
| 328 | VkDeviceSize offset; | ||
| 329 | VkIndexType type; | ||
| 330 | } index; | ||
| 331 | |||
| 332 | template <size_t N> | ||
| 333 | void BindStatic(const Device& device, VKScheduler& scheduler) const { | ||
| 334 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 335 | if (index.buffer) { | ||
| 336 | BindStatic<N, true, true>(scheduler); | ||
| 337 | } else { | ||
| 338 | BindStatic<N, false, true>(scheduler); | ||
| 339 | } | ||
| 340 | } else { | ||
| 341 | if (index.buffer) { | ||
| 342 | BindStatic<N, true, false>(scheduler); | ||
| 343 | } else { | ||
| 344 | BindStatic<N, false, false>(scheduler); | ||
| 345 | } | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | template <size_t N, bool is_indexed, bool has_extended_dynamic_state> | ||
| 350 | void BindStatic(VKScheduler& scheduler) const { | ||
| 351 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 352 | if constexpr (N == 0) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | std::array<VkBuffer, N> buffers; | ||
| 357 | std::array<VkDeviceSize, N> offsets; | ||
| 358 | std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); | ||
| 359 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 360 | |||
| 361 | if constexpr (has_extended_dynamic_state) { | ||
| 362 | // With extended dynamic states we can specify the length and stride of a vertex buffer | ||
| 363 | std::array<VkDeviceSize, N> sizes; | ||
| 364 | std::array<u16, N> strides; | ||
| 365 | std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); | ||
| 366 | std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); | ||
| 367 | |||
| 368 | if constexpr (is_indexed) { | ||
| 369 | scheduler.Record( | ||
| 370 | [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) { | ||
| 371 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 372 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 373 | offsets.data(), sizes.data(), | ||
| 374 | ExpandStrides(strides).data()); | ||
| 375 | }); | ||
| 376 | } else { | ||
| 377 | scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) { | ||
| 378 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 379 | offsets.data(), sizes.data(), | ||
| 380 | ExpandStrides(strides).data()); | ||
| 381 | }); | ||
| 382 | } | ||
| 383 | return; | ||
| 384 | } | ||
| 385 | |||
| 386 | if constexpr (is_indexed) { | ||
| 387 | // Indexed draw | ||
| 388 | scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { | ||
| 389 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 390 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 391 | }); | ||
| 392 | } else { | ||
| 393 | // Array draw | ||
| 394 | scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { | ||
| 395 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 396 | }); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | }; | ||
| 400 | |||
| 401 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { | ||
| 402 | if (is_indexed) { | ||
| 403 | cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); | ||
| 404 | } else { | ||
| 405 | cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); | ||
| 406 | } | 227 | } |
| 228 | return params; | ||
| 407 | } | 229 | } |
| 230 | } // Anonymous namespace | ||
| 408 | 231 | ||
| 409 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 232 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 410 | Tegra::MemoryManager& gpu_memory_, | 233 | Tegra::MemoryManager& gpu_memory_, |
| @@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 414 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, | 237 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, |
| 415 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, | 238 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, |
| 416 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, | 239 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, |
| 417 | state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), | 240 | state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 418 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 241 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 419 | update_descriptor_queue(device, scheduler), | 242 | update_descriptor_queue(device, scheduler), |
| 420 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 421 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 422 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 423 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 424 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, | 244 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, |
| 425 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 245 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 246 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||
| 247 | update_descriptor_queue, descriptor_pool), | ||
| 248 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||
| 426 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 249 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 427 | descriptor_pool, update_descriptor_queue), | 250 | descriptor_pool, update_descriptor_queue), |
| 428 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, | ||
| 429 | stream_buffer, staging_pool), | ||
| 430 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | 251 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, |
| 431 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), | 252 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 432 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 253 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| 433 | scheduler.SetQueryCache(query_cache); | 254 | scheduler.SetQueryCache(query_cache); |
| 434 | if (device.UseAsynchronousShaders()) { | 255 | if (device.UseAsynchronousShaders()) { |
| @@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 449 | GraphicsPipelineCacheKey key; | 270 | GraphicsPipelineCacheKey key; |
| 450 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); | 271 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); |
| 451 | 272 | ||
| 452 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | 273 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 453 | |||
| 454 | BufferBindings buffer_bindings; | ||
| 455 | const DrawParameters draw_params = | ||
| 456 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 457 | 274 | ||
| 458 | auto lock = texture_cache.AcquireLock(); | ||
| 459 | texture_cache.SynchronizeGraphicsDescriptors(); | 275 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 460 | |||
| 461 | texture_cache.UpdateRenderTargets(false); | 276 | texture_cache.UpdateRenderTargets(false); |
| 462 | 277 | ||
| 463 | const auto shaders = pipeline_cache.GetShaders(); | 278 | const auto shaders = pipeline_cache.GetShaders(); |
| 464 | key.shaders = GetShaderAddresses(shaders); | 279 | key.shaders = GetShaderAddresses(shaders); |
| 465 | SetupShaderDescriptors(shaders); | 280 | SetupShaderDescriptors(shaders, is_indexed); |
| 466 | |||
| 467 | buffer_cache.Unmap(); | ||
| 468 | 281 | ||
| 469 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 282 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 470 | key.renderpass = framebuffer->RenderPass(); | 283 | key.renderpass = framebuffer->RenderPass(); |
| @@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 476 | return; | 289 | return; |
| 477 | } | 290 | } |
| 478 | 291 | ||
| 479 | buffer_bindings.Bind(device, scheduler); | ||
| 480 | |||
| 481 | BeginTransformFeedback(); | 292 | BeginTransformFeedback(); |
| 482 | 293 | ||
| 483 | scheduler.RequestRenderpass(framebuffer); | 294 | scheduler.RequestRenderpass(framebuffer); |
| 484 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | 295 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); |
| 485 | UpdateDynamicStates(); | 296 | UpdateDynamicStates(); |
| 486 | 297 | ||
| 487 | const auto pipeline_layout = pipeline->GetLayout(); | 298 | const auto& regs = maxwell3d.regs; |
| 488 | const auto descriptor_set = pipeline->CommitDescriptorSet(); | 299 | const u32 num_instances = maxwell3d.mme_draw.instance_count; |
| 300 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||
| 301 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||
| 302 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 489 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | 303 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| 490 | if (descriptor_set) { | 304 | if (descriptor_set) { |
| 491 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | 305 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |
| 492 | DESCRIPTOR_SET, descriptor_set, {}); | 306 | DESCRIPTOR_SET, descriptor_set, nullptr); |
| 307 | } | ||
| 308 | if (draw_params.is_indexed) { | ||
| 309 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 310 | draw_params.base_vertex, draw_params.base_instance); | ||
| 311 | } else { | ||
| 312 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 313 | draw_params.base_vertex, draw_params.base_instance); | ||
| 493 | } | 314 | } |
| 494 | draw_params.Draw(cmdbuf); | ||
| 495 | }); | 315 | }); |
| 496 | 316 | ||
| 497 | EndTransformFeedback(); | 317 | EndTransformFeedback(); |
| @@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() { | |||
| 515 | return; | 335 | return; |
| 516 | } | 336 | } |
| 517 | 337 | ||
| 518 | auto lock = texture_cache.AcquireLock(); | 338 | std::scoped_lock lock{texture_cache.mutex}; |
| 519 | texture_cache.UpdateRenderTargets(true); | 339 | texture_cache.UpdateRenderTargets(true); |
| 520 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 340 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 521 | const VkExtent2D render_area = framebuffer->RenderArea(); | 341 | const VkExtent2D render_area = framebuffer->RenderArea(); |
| @@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() { | |||
| 559 | if (use_stencil) { | 379 | if (use_stencil) { |
| 560 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; | 380 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| 561 | } | 381 | } |
| 562 | |||
| 563 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | 382 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, |
| 564 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { | 383 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { |
| 565 | VkClearAttachment attachment; | 384 | VkClearAttachment attachment; |
| @@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 580 | auto& pipeline = pipeline_cache.GetComputePipeline({ | 399 | auto& pipeline = pipeline_cache.GetComputePipeline({ |
| 581 | .shader = code_addr, | 400 | .shader = code_addr, |
| 582 | .shared_memory_size = launch_desc.shared_alloc, | 401 | .shared_memory_size = launch_desc.shared_alloc, |
| 583 | .workgroup_size = | 402 | .workgroup_size{ |
| 584 | { | 403 | launch_desc.block_dim_x, |
| 585 | launch_desc.block_dim_x, | 404 | launch_desc.block_dim_y, |
| 586 | launch_desc.block_dim_y, | 405 | launch_desc.block_dim_z, |
| 587 | launch_desc.block_dim_z, | 406 | }, |
| 588 | }, | ||
| 589 | }); | 407 | }); |
| 590 | 408 | ||
| 591 | // Compute dispatches can't be executed inside a renderpass | 409 | // Compute dispatches can't be executed inside a renderpass |
| @@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 594 | image_view_indices.clear(); | 412 | image_view_indices.clear(); |
| 595 | sampler_handles.clear(); | 413 | sampler_handles.clear(); |
| 596 | 414 | ||
| 597 | auto lock = texture_cache.AcquireLock(); | 415 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 598 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 599 | 416 | ||
| 600 | const auto& entries = pipeline.GetEntries(); | 417 | const auto& entries = pipeline.GetEntries(); |
| 418 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 419 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 420 | u32 ssbo_index = 0; | ||
| 421 | for (const auto& buffer : entries.global_buffers) { | ||
| 422 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 423 | buffer.is_written); | ||
| 424 | ++ssbo_index; | ||
| 425 | } | ||
| 426 | buffer_cache.UpdateComputeBuffers(); | ||
| 427 | |||
| 428 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 429 | |||
| 601 | SetupComputeUniformTexels(entries); | 430 | SetupComputeUniformTexels(entries); |
| 602 | SetupComputeTextures(entries); | 431 | SetupComputeTextures(entries); |
| 603 | SetupComputeStorageTexels(entries); | 432 | SetupComputeStorageTexels(entries); |
| @@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 606 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 435 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 607 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 436 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); |
| 608 | 437 | ||
| 609 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 610 | |||
| 611 | update_descriptor_queue.Acquire(); | 438 | update_descriptor_queue.Acquire(); |
| 612 | 439 | ||
| 613 | SetupComputeConstBuffers(entries); | 440 | buffer_cache.BindHostComputeBuffers(); |
| 614 | SetupComputeGlobalBuffers(entries); | ||
| 615 | 441 | ||
| 616 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 442 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| 617 | VkSampler* sampler_ptr = sampler_handles.data(); | 443 | VkSampler* sampler_ptr = sampler_handles.data(); |
| 618 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | 444 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |
| 619 | sampler_ptr); | 445 | sampler_ptr); |
| 620 | 446 | ||
| 621 | buffer_cache.Unmap(); | ||
| 622 | |||
| 623 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | 447 | const VkPipeline pipeline_handle = pipeline.GetHandle(); |
| 624 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | 448 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); |
| 625 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | 449 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); |
| @@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 644 | query_cache.Query(gpu_addr, type, timestamp); | 468 | query_cache.Query(gpu_addr, type, timestamp); |
| 645 | } | 469 | } |
| 646 | 470 | ||
| 471 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 472 | u32 size) { | ||
| 473 | buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||
| 474 | } | ||
| 475 | |||
| 647 | void RasterizerVulkan::FlushAll() {} | 476 | void RasterizerVulkan::FlushAll() {} |
| 648 | 477 | ||
| 649 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | 478 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { |
| @@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 651 | return; | 480 | return; |
| 652 | } | 481 | } |
| 653 | { | 482 | { |
| 654 | auto lock = texture_cache.AcquireLock(); | 483 | std::scoped_lock lock{texture_cache.mutex}; |
| 655 | texture_cache.DownloadMemory(addr, size); | 484 | texture_cache.DownloadMemory(addr, size); |
| 656 | } | 485 | } |
| 657 | buffer_cache.FlushRegion(addr, size); | 486 | { |
| 487 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 488 | buffer_cache.DownloadMemory(addr, size); | ||
| 489 | } | ||
| 658 | query_cache.FlushRegion(addr, size); | 490 | query_cache.FlushRegion(addr, size); |
| 659 | } | 491 | } |
| 660 | 492 | ||
| 661 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | 493 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { |
| 494 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | ||
| 662 | if (!Settings::IsGPULevelHigh()) { | 495 | if (!Settings::IsGPULevelHigh()) { |
| 663 | return buffer_cache.MustFlushRegion(addr, size); | 496 | return buffer_cache.IsRegionGpuModified(addr, size); |
| 664 | } | 497 | } |
| 665 | return texture_cache.IsRegionGpuModified(addr, size) || | 498 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 666 | buffer_cache.MustFlushRegion(addr, size); | 499 | buffer_cache.IsRegionGpuModified(addr, size); |
| 667 | } | 500 | } |
| 668 | 501 | ||
| 669 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 502 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
| 671 | return; | 504 | return; |
| 672 | } | 505 | } |
| 673 | { | 506 | { |
| 674 | auto lock = texture_cache.AcquireLock(); | 507 | std::scoped_lock lock{texture_cache.mutex}; |
| 675 | texture_cache.WriteMemory(addr, size); | 508 | texture_cache.WriteMemory(addr, size); |
| 676 | } | 509 | } |
| 510 | { | ||
| 511 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 512 | buffer_cache.WriteMemory(addr, size); | ||
| 513 | } | ||
| 677 | pipeline_cache.InvalidateRegion(addr, size); | 514 | pipeline_cache.InvalidateRegion(addr, size); |
| 678 | buffer_cache.InvalidateRegion(addr, size); | ||
| 679 | query_cache.InvalidateRegion(addr, size); | 515 | query_cache.InvalidateRegion(addr, size); |
| 680 | } | 516 | } |
| 681 | 517 | ||
| @@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 683 | if (addr == 0 || size == 0) { | 519 | if (addr == 0 || size == 0) { |
| 684 | return; | 520 | return; |
| 685 | } | 521 | } |
| 522 | pipeline_cache.OnCPUWrite(addr, size); | ||
| 686 | { | 523 | { |
| 687 | auto lock = texture_cache.AcquireLock(); | 524 | std::scoped_lock lock{texture_cache.mutex}; |
| 688 | texture_cache.WriteMemory(addr, size); | 525 | texture_cache.WriteMemory(addr, size); |
| 689 | } | 526 | } |
| 690 | pipeline_cache.OnCPUWrite(addr, size); | 527 | { |
| 691 | buffer_cache.OnCPUWrite(addr, size); | 528 | std::scoped_lock lock{buffer_cache.mutex}; |
| 529 | buffer_cache.CachedWriteMemory(addr, size); | ||
| 530 | } | ||
| 692 | } | 531 | } |
| 693 | 532 | ||
| 694 | void RasterizerVulkan::SyncGuestHost() { | 533 | void RasterizerVulkan::SyncGuestHost() { |
| 695 | buffer_cache.SyncGuestHost(); | ||
| 696 | pipeline_cache.SyncGuestHost(); | 534 | pipeline_cache.SyncGuestHost(); |
| 535 | { | ||
| 536 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 537 | buffer_cache.FlushCachedWrites(); | ||
| 538 | } | ||
| 697 | } | 539 | } |
| 698 | 540 | ||
| 699 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 541 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { |
| 700 | { | 542 | { |
| 701 | auto lock = texture_cache.AcquireLock(); | 543 | std::scoped_lock lock{texture_cache.mutex}; |
| 702 | texture_cache.UnmapMemory(addr, size); | 544 | texture_cache.UnmapMemory(addr, size); |
| 703 | } | 545 | } |
| 704 | buffer_cache.OnCPUWrite(addr, size); | 546 | { |
| 547 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 548 | buffer_cache.WriteMemory(addr, size); | ||
| 549 | } | ||
| 705 | pipeline_cache.OnCPUWrite(addr, size); | 550 | pipeline_cache.OnCPUWrite(addr, size); |
| 706 | } | 551 | } |
| 707 | 552 | ||
| @@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() { | |||
| 774 | draw_counter = 0; | 619 | draw_counter = 0; |
| 775 | update_descriptor_queue.TickFrame(); | 620 | update_descriptor_queue.TickFrame(); |
| 776 | fence_manager.TickFrame(); | 621 | fence_manager.TickFrame(); |
| 777 | buffer_cache.TickFrame(); | ||
| 778 | staging_pool.TickFrame(); | 622 | staging_pool.TickFrame(); |
| 779 | { | 623 | { |
| 780 | auto lock = texture_cache.AcquireLock(); | 624 | std::scoped_lock lock{texture_cache.mutex}; |
| 781 | texture_cache.TickFrame(); | 625 | texture_cache.TickFrame(); |
| 782 | } | 626 | } |
| 627 | { | ||
| 628 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 629 | buffer_cache.TickFrame(); | ||
| 630 | } | ||
| 783 | } | 631 | } |
| 784 | 632 | ||
| 785 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 633 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 786 | const Tegra::Engines::Fermi2D::Surface& dst, | 634 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 787 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 635 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 788 | auto lock = texture_cache.AcquireLock(); | 636 | std::scoped_lock lock{texture_cache.mutex}; |
| 789 | texture_cache.BlitImage(dst, src, copy_config); | 637 | texture_cache.BlitImage(dst, src, copy_config); |
| 790 | return true; | 638 | return true; |
| 791 | } | 639 | } |
| @@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 795 | if (!framebuffer_addr) { | 643 | if (!framebuffer_addr) { |
| 796 | return false; | 644 | return false; |
| 797 | } | 645 | } |
| 798 | 646 | std::scoped_lock lock{texture_cache.mutex}; | |
| 799 | auto lock = texture_cache.AcquireLock(); | ||
| 800 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); | 647 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); |
| 801 | if (!image_view) { | 648 | if (!image_view) { |
| 802 | return false; | 649 | return false; |
| 803 | } | 650 | } |
| 804 | |||
| 805 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | 651 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); |
| 806 | screen_info.width = image_view->size.width; | 652 | screen_info.width = image_view->size.width; |
| 807 | screen_info.height = image_view->size.height; | 653 | screen_info.height = image_view->size.height; |
| @@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() { | |||
| 830 | draw_counter = 0; | 676 | draw_counter = 0; |
| 831 | } | 677 | } |
| 832 | 678 | ||
| 833 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 834 | BufferBindings& buffer_bindings, | ||
| 835 | bool is_indexed, | ||
| 836 | bool is_instanced) { | ||
| 837 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 838 | |||
| 839 | const auto& regs = maxwell3d.regs; | ||
| 840 | |||
| 841 | SetupVertexArrays(buffer_bindings); | ||
| 842 | |||
| 843 | const u32 base_instance = regs.vb_base_instance; | ||
| 844 | const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; | ||
| 845 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 846 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 847 | |||
| 848 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 849 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 850 | |||
| 851 | return params; | ||
| 852 | } | ||
| 853 | |||
| 854 | void RasterizerVulkan::SetupShaderDescriptors( | 679 | void RasterizerVulkan::SetupShaderDescriptors( |
| 855 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 680 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |
| 856 | image_view_indices.clear(); | 681 | image_view_indices.clear(); |
| 857 | sampler_handles.clear(); | 682 | sampler_handles.clear(); |
| 858 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 683 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| @@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 860 | if (!shader) { | 685 | if (!shader) { |
| 861 | continue; | 686 | continue; |
| 862 | } | 687 | } |
| 863 | const auto& entries = shader->GetEntries(); | 688 | const ShaderEntries& entries = shader->GetEntries(); |
| 864 | SetupGraphicsUniformTexels(entries, stage); | 689 | SetupGraphicsUniformTexels(entries, stage); |
| 865 | SetupGraphicsTextures(entries, stage); | 690 | SetupGraphicsTextures(entries, stage); |
| 866 | SetupGraphicsStorageTexels(entries, stage); | 691 | SetupGraphicsStorageTexels(entries, stage); |
| 867 | SetupGraphicsImages(entries, stage); | 692 | SetupGraphicsImages(entries, stage); |
| 693 | |||
| 694 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 695 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 696 | u32 ssbo_index = 0; | ||
| 697 | for (const auto& buffer : entries.global_buffers) { | ||
| 698 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 699 | buffer.cbuf_offset, buffer.is_written); | ||
| 700 | ++ssbo_index; | ||
| 701 | } | ||
| 868 | } | 702 | } |
| 869 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 703 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 704 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 870 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | 705 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 871 | 706 | ||
| 707 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 708 | |||
| 872 | update_descriptor_queue.Acquire(); | 709 | update_descriptor_queue.Acquire(); |
| 873 | 710 | ||
| 874 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 711 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| @@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 879 | if (!shader) { | 716 | if (!shader) { |
| 880 | continue; | 717 | continue; |
| 881 | } | 718 | } |
| 882 | const auto& entries = shader->GetEntries(); | 719 | buffer_cache.BindHostStageBuffers(stage); |
| 883 | SetupGraphicsConstBuffers(entries, stage); | 720 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, |
| 884 | SetupGraphicsGlobalBuffers(entries, stage); | 721 | image_view_id_ptr, sampler_ptr); |
| 885 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 886 | sampler_ptr); | ||
| 887 | } | 722 | } |
| 888 | } | 723 | } |
| 889 | 724 | ||
| @@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 916 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 751 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |
| 917 | return; | 752 | return; |
| 918 | } | 753 | } |
| 919 | |||
| 920 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 754 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 921 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 755 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 922 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 756 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| 923 | 757 | scheduler.Record( | |
| 924 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | 758 | [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 925 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 926 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 927 | |||
| 928 | const auto& binding = regs.tfb_bindings[0]; | ||
| 929 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 930 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 931 | |||
| 932 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 933 | const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); | ||
| 934 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 935 | |||
| 936 | scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { | ||
| 937 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | ||
| 938 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 939 | }); | ||
| 940 | } | 759 | } |
| 941 | 760 | ||
| 942 | void RasterizerVulkan::EndTransformFeedback() { | 761 | void RasterizerVulkan::EndTransformFeedback() { |
| @@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 947 | if (!device.IsExtTransformFeedbackSupported()) { | 766 | if (!device.IsExtTransformFeedbackSupported()) { |
| 948 | return; | 767 | return; |
| 949 | } | 768 | } |
| 950 | |||
| 951 | scheduler.Record( | 769 | scheduler.Record( |
| 952 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 770 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 953 | } | 771 | } |
| 954 | 772 | ||
| 955 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | ||
| 956 | const auto& regs = maxwell3d.regs; | ||
| 957 | |||
| 958 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 959 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 960 | if (!vertex_array.IsEnabled()) { | ||
| 961 | continue; | ||
| 962 | } | ||
| 963 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 964 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 965 | |||
| 966 | ASSERT(end >= start); | ||
| 967 | const size_t size = end - start; | ||
| 968 | if (size == 0) { | ||
| 969 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 970 | continue; | ||
| 971 | } | ||
| 972 | const auto info = buffer_cache.UploadMemory(start, size); | ||
| 973 | buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 978 | bool is_indexed) { | ||
| 979 | if (params.num_vertices == 0) { | ||
| 980 | return; | ||
| 981 | } | ||
| 982 | const auto& regs = maxwell3d.regs; | ||
| 983 | switch (regs.draw.topology) { | ||
| 984 | case Maxwell::PrimitiveTopology::Quads: { | ||
| 985 | if (!params.is_indexed) { | ||
| 986 | const auto [buffer, offset] = | ||
| 987 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 988 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 989 | params.base_vertex = 0; | ||
| 990 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 991 | params.is_indexed = true; | ||
| 992 | break; | ||
| 993 | } | ||
| 994 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 995 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 996 | VkBuffer buffer = info.handle; | ||
| 997 | u64 offset = info.offset; | ||
| 998 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||
| 999 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||
| 1000 | |||
| 1001 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 1002 | params.num_vertices = (params.num_vertices / 4) * 6; | ||
| 1003 | params.base_vertex = 0; | ||
| 1004 | break; | ||
| 1005 | } | ||
| 1006 | default: { | ||
| 1007 | if (!is_indexed) { | ||
| 1008 | break; | ||
| 1009 | } | ||
| 1010 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 1011 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 1012 | VkBuffer buffer = info.handle; | ||
| 1013 | u64 offset = info.offset; | ||
| 1014 | |||
| 1015 | auto format = regs.index_array.format; | ||
| 1016 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 1017 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 1018 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); | ||
| 1019 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 1023 | break; | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1029 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1030 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | ||
| 1031 | for (const auto& entry : entries.const_buffers) { | ||
| 1032 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1037 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1038 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | ||
| 1039 | |||
| 1040 | for (const auto& entry : entries.global_buffers) { | ||
| 1041 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 1042 | SetupGlobalBuffer(entry, addr); | ||
| 1043 | } | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | 773 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |
| 1047 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1048 | const auto& regs = maxwell3d.regs; | 774 | const auto& regs = maxwell3d.regs; |
| 1049 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 775 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1050 | for (const auto& entry : entries.uniform_texels) { | 776 | for (const auto& entry : entries.uniform_texels) { |
| @@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, | |||
| 1054 | } | 780 | } |
| 1055 | 781 | ||
| 1056 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | 782 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |
| 1057 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1058 | const auto& regs = maxwell3d.regs; | 783 | const auto& regs = maxwell3d.regs; |
| 1059 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 784 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1060 | for (const auto& entry : entries.samplers) { | 785 | for (const auto& entry : entries.samplers) { |
| @@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_ | |||
| 1070 | } | 795 | } |
| 1071 | 796 | ||
| 1072 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | 797 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |
| 1073 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1074 | const auto& regs = maxwell3d.regs; | 798 | const auto& regs = maxwell3d.regs; |
| 1075 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 799 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1076 | for (const auto& entry : entries.storage_texels) { | 800 | for (const auto& entry : entries.storage_texels) { |
| @@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, | |||
| 1080 | } | 804 | } |
| 1081 | 805 | ||
| 1082 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | 806 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |
| 1083 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1084 | const auto& regs = maxwell3d.regs; | 807 | const auto& regs = maxwell3d.regs; |
| 1085 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 808 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1086 | for (const auto& entry : entries.images) { | 809 | for (const auto& entry : entries.images) { |
| @@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t | |||
| 1089 | } | 812 | } |
| 1090 | } | 813 | } |
| 1091 | 814 | ||
| 1092 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 1093 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1094 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 1095 | for (const auto& entry : entries.const_buffers) { | ||
| 1096 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 1097 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 1098 | const Tegra::Engines::ConstBufferInfo info{ | ||
| 1099 | .address = config.Address(), | ||
| 1100 | .size = config.size, | ||
| 1101 | .enabled = mask[entry.GetIndex()], | ||
| 1102 | }; | ||
| 1103 | SetupConstBuffer(entry, info); | ||
| 1104 | } | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 1108 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1109 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||
| 1110 | for (const auto& entry : entries.global_buffers) { | ||
| 1111 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 1112 | SetupGlobalBuffer(entry, addr); | ||
| 1113 | } | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | 815 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1117 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1118 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 816 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1119 | for (const auto& entry : entries.uniform_texels) { | 817 | for (const auto& entry : entries.uniform_texels) { |
| 1120 | const TextureHandle handle = | 818 | const TextureHandle handle = |
| @@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | |||
| 1124 | } | 822 | } |
| 1125 | 823 | ||
| 1126 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | 824 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |
| 1127 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1128 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 825 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1129 | for (const auto& entry : entries.samplers) { | 826 | for (const auto& entry : entries.samplers) { |
| 1130 | for (size_t index = 0; index < entry.size; ++index) { | 827 | for (size_t index = 0; index < entry.size; ++index) { |
| @@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 1139 | } | 836 | } |
| 1140 | 837 | ||
| 1141 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | 838 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |
| 1142 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1143 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 839 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1144 | for (const auto& entry : entries.storage_texels) { | 840 | for (const auto& entry : entries.storage_texels) { |
| 1145 | const TextureHandle handle = | 841 | const TextureHandle handle = |
| @@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | |||
| 1149 | } | 845 | } |
| 1150 | 846 | ||
| 1151 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 847 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1152 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1153 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 848 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1154 | for (const auto& entry : entries.images) { | 849 | for (const auto& entry : entries.images) { |
| 1155 | const TextureHandle handle = | 850 | const TextureHandle handle = |
| @@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | |||
| 1158 | } | 853 | } |
| 1159 | } | 854 | } |
| 1160 | 855 | ||
| 1161 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 1162 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 1163 | if (!buffer.enabled) { | ||
| 1164 | // Set values to zero to unbind buffers | ||
| 1165 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | ||
| 1166 | return; | ||
| 1167 | } | ||
| 1168 | // Align the size to avoid bad std140 interactions | ||
| 1169 | const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 1170 | ASSERT(size <= MaxConstbufferSize); | ||
| 1171 | |||
| 1172 | const u64 alignment = device.GetUniformBufferAlignment(); | ||
| 1173 | const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); | ||
| 1174 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 1178 | const u64 actual_addr = gpu_memory.Read<u64>(address); | ||
| 1179 | const u32 size = gpu_memory.Read<u32>(address + 8); | ||
| 1180 | |||
| 1181 | if (size == 0) { | ||
| 1182 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry | ||
| 1183 | // because Vulkan doesn't like empty buffers. | ||
| 1184 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | ||
| 1185 | // default buffer. | ||
| 1186 | static constexpr size_t dummy_size = 4; | ||
| 1187 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 1188 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | ||
| 1189 | return; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | const auto info = buffer_cache.UploadMemory( | ||
| 1193 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 1194 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 856 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1198 | if (!state_tracker.TouchViewports()) { | 857 | if (!state_tracker.TouchViewports()) { |
| 1199 | return; | 858 | return; |
| @@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 1206 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | 865 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), |
| 1207 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | 866 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), |
| 1208 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | 867 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), |
| 1209 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | 868 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), |
| 869 | }; | ||
| 1210 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 870 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); |
| 1211 | } | 871 | } |
| 1212 | 872 | ||
| @@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 1214 | if (!state_tracker.TouchScissors()) { | 874 | if (!state_tracker.TouchScissors()) { |
| 1215 | return; | 875 | return; |
| 1216 | } | 876 | } |
| 1217 | const std::array scissors = { | 877 | const std::array scissors{ |
| 1218 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | 878 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), |
| 1219 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | 879 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), |
| 1220 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | 880 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), |
| 1221 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | 881 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), |
| 1222 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | 882 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), |
| 1223 | GetScissorState(regs, 15)}; | 883 | GetScissorState(regs, 15), |
| 884 | }; | ||
| 1224 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 885 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); |
| 1225 | } | 886 | } |
| 1226 | 887 | ||
| @@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1385 | }); | 1046 | }); |
| 1386 | } | 1047 | } |
| 1387 | 1048 | ||
| 1388 | size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1389 | size_t size = CalculateVertexArraysSize(); | ||
| 1390 | if (is_indexed) { | ||
| 1391 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1392 | } | ||
| 1393 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1394 | return size; | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1398 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1399 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1403 | const auto& regs = maxwell3d.regs; | ||
| 1404 | |||
| 1405 | size_t size = 0; | ||
| 1406 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1407 | // This implementation assumes that all attributes are used in the shader. | ||
| 1408 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1409 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1410 | DEBUG_ASSERT(end >= start); | ||
| 1411 | |||
| 1412 | size += (end - start) * regs.vertex_array[index].enable; | ||
| 1413 | } | ||
| 1414 | return size; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1418 | return static_cast<size_t>(maxwell3d.regs.index_array.count) * | ||
| 1419 | static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1423 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1424 | if (entry.IsIndirect()) { | ||
| 1425 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1426 | return buffer.size; | ||
| 1427 | } else { | ||
| 1428 | // Buffer is accessed directly, upload just what we use | ||
| 1429 | return entry.GetSize(); | ||
| 1430 | } | ||
| 1431 | } | ||
| 1432 | |||
| 1433 | VkBuffer RasterizerVulkan::DefaultBuffer() { | ||
| 1434 | if (default_buffer) { | ||
| 1435 | return *default_buffer; | ||
| 1436 | } | ||
| 1437 | default_buffer = device.GetLogical().CreateBuffer({ | ||
| 1438 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 1439 | .pNext = nullptr, | ||
| 1440 | .flags = 0, | ||
| 1441 | .size = DEFAULT_BUFFER_SIZE, | ||
| 1442 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | ||
| 1443 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, | ||
| 1444 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 1445 | .queueFamilyIndexCount = 0, | ||
| 1446 | .pQueueFamilyIndices = nullptr, | ||
| 1447 | }); | ||
| 1448 | default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); | ||
| 1449 | |||
| 1450 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 1451 | scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { | ||
| 1452 | cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 1453 | }); | ||
| 1454 | return *default_buffer; | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | } // namespace Vulkan | 1049 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8e261b9bd..7fc6741da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -18,14 +18,12 @@ | |||
| 18 | #include "video_core/renderer_vulkan/blit_image.h" | 18 | #include "video_core/renderer_vulkan/blit_image.h" |
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 22 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 28 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | 29 | #include "video_core/shader/async_shaders.h" |
| @@ -49,7 +47,6 @@ namespace Vulkan { | |||
| 49 | struct VKScreenInfo; | 47 | struct VKScreenInfo; |
| 50 | 48 | ||
| 51 | class StateTracker; | 49 | class StateTracker; |
| 52 | class BufferBindings; | ||
| 53 | 50 | ||
| 54 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 51 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 55 | public: | 52 | public: |
| @@ -65,6 +62,7 @@ public: | |||
| 65 | void DispatchCompute(GPUVAddr code_addr) override; | 62 | void DispatchCompute(GPUVAddr code_addr) override; |
| 66 | void ResetCounter(VideoCore::QueryType type) override; | 63 | void ResetCounter(VideoCore::QueryType type) override; |
| 67 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 64 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 65 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||
| 68 | void FlushAll() override; | 66 | void FlushAll() override; |
| 69 | void FlushRegion(VAddr addr, u64 size) override; | 67 | void FlushRegion(VAddr addr, u64 size) override; |
| 70 | bool MustFlushRegion(VAddr addr, u64 size) override; | 68 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| @@ -107,24 +105,11 @@ private: | |||
| 107 | 105 | ||
| 108 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | 106 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); |
| 109 | 107 | ||
| 110 | struct DrawParameters { | ||
| 111 | void Draw(vk::CommandBuffer cmdbuf) const; | ||
| 112 | |||
| 113 | u32 base_instance = 0; | ||
| 114 | u32 num_instances = 0; | ||
| 115 | u32 base_vertex = 0; | ||
| 116 | u32 num_vertices = 0; | ||
| 117 | bool is_indexed = 0; | ||
| 118 | }; | ||
| 119 | |||
| 120 | void FlushWork(); | 108 | void FlushWork(); |
| 121 | 109 | ||
| 122 | /// Setups geometry buffers and state. | ||
| 123 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 124 | bool is_indexed, bool is_instanced); | ||
| 125 | |||
| 126 | /// Setup descriptors in the graphics pipeline. | 110 | /// Setup descriptors in the graphics pipeline. |
| 127 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | 111 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, |
| 112 | bool is_indexed); | ||
| 128 | 113 | ||
| 129 | void UpdateDynamicStates(); | 114 | void UpdateDynamicStates(); |
| 130 | 115 | ||
| @@ -132,16 +117,6 @@ private: | |||
| 132 | 117 | ||
| 133 | void EndTransformFeedback(); | 118 | void EndTransformFeedback(); |
| 134 | 119 | ||
| 135 | void SetupVertexArrays(BufferBindings& buffer_bindings); | ||
| 136 | |||
| 137 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 138 | |||
| 139 | /// Setup constant buffers in the graphics pipeline. | ||
| 140 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 141 | |||
| 142 | /// Setup global buffers in the graphics pipeline. | ||
| 143 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 144 | |||
| 145 | /// Setup uniform texels in the graphics pipeline. | 120 | /// Setup uniform texels in the graphics pipeline. |
| 146 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | 121 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |
| 147 | 122 | ||
| @@ -154,12 +129,6 @@ private: | |||
| 154 | /// Setup images in the graphics pipeline. | 129 | /// Setup images in the graphics pipeline. |
| 155 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | 130 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |
| 156 | 131 | ||
| 157 | /// Setup constant buffers in the compute pipeline. | ||
| 158 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 159 | |||
| 160 | /// Setup global buffers in the compute pipeline. | ||
| 161 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 162 | |||
| 163 | /// Setup texel buffers in the compute pipeline. | 132 | /// Setup texel buffers in the compute pipeline. |
| 164 | void SetupComputeUniformTexels(const ShaderEntries& entries); | 133 | void SetupComputeUniformTexels(const ShaderEntries& entries); |
| 165 | 134 | ||
| @@ -172,11 +141,6 @@ private: | |||
| 172 | /// Setup images in the compute pipeline. | 141 | /// Setup images in the compute pipeline. |
| 173 | void SetupComputeImages(const ShaderEntries& entries); | 142 | void SetupComputeImages(const ShaderEntries& entries); |
| 174 | 143 | ||
| 175 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 176 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 177 | |||
| 178 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 179 | |||
| 180 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 144 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 181 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 145 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 182 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 146 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -193,19 +157,6 @@ private: | |||
| 193 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 157 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 194 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 158 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 195 | 159 | ||
| 196 | size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 197 | |||
| 198 | size_t CalculateComputeStreamBufferSize() const; | ||
| 199 | |||
| 200 | size_t CalculateVertexArraysSize() const; | ||
| 201 | |||
| 202 | size_t CalculateIndexBufferSize() const; | ||
| 203 | |||
| 204 | size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 205 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 206 | |||
| 207 | VkBuffer DefaultBuffer(); | ||
| 208 | |||
| 209 | Tegra::GPU& gpu; | 160 | Tegra::GPU& gpu; |
| 210 | Tegra::MemoryManager& gpu_memory; | 161 | Tegra::MemoryManager& gpu_memory; |
| 211 | Tegra::Engines::Maxwell3D& maxwell3d; | 162 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -217,24 +168,19 @@ private: | |||
| 217 | StateTracker& state_tracker; | 168 | StateTracker& state_tracker; |
| 218 | VKScheduler& scheduler; | 169 | VKScheduler& scheduler; |
| 219 | 170 | ||
| 220 | VKStreamBuffer stream_buffer; | ||
| 221 | StagingBufferPool staging_pool; | 171 | StagingBufferPool staging_pool; |
| 222 | VKDescriptorPool descriptor_pool; | 172 | VKDescriptorPool descriptor_pool; |
| 223 | VKUpdateDescriptorQueue update_descriptor_queue; | 173 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 224 | BlitImageHelper blit_image; | 174 | BlitImageHelper blit_image; |
| 225 | QuadArrayPass quad_array_pass; | ||
| 226 | QuadIndexedPass quad_indexed_pass; | ||
| 227 | Uint8Pass uint8_pass; | ||
| 228 | 175 | ||
| 229 | TextureCacheRuntime texture_cache_runtime; | 176 | TextureCacheRuntime texture_cache_runtime; |
| 230 | TextureCache texture_cache; | 177 | TextureCache texture_cache; |
| 178 | BufferCacheRuntime buffer_cache_runtime; | ||
| 179 | BufferCache buffer_cache; | ||
| 231 | VKPipelineCache pipeline_cache; | 180 | VKPipelineCache pipeline_cache; |
| 232 | VKBufferCache buffer_cache; | ||
| 233 | VKQueryCache query_cache; | 181 | VKQueryCache query_cache; |
| 234 | VKFenceManager fence_manager; | 182 | VKFenceManager fence_manager; |
| 235 | 183 | ||
| 236 | vk::Buffer default_buffer; | ||
| 237 | MemoryCommit default_buffer_commit; | ||
| 238 | vk::Event wfi_event; | 184 | vk::Event wfi_event; |
| 239 | VideoCommon::Shader::AsyncShaders async_shaders; | 185 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 240 | 186 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 66004f9c0..f35c120b0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() { | |||
| 52 | worker_thread.join(); | 52 | worker_thread.join(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | u64 VKScheduler::CurrentTick() const noexcept { | ||
| 56 | return master_semaphore->CurrentTick(); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool VKScheduler::IsFree(u64 tick) const noexcept { | ||
| 60 | return master_semaphore->IsFree(tick); | ||
| 61 | } | ||
| 62 | |||
| 63 | void VKScheduler::Wait(u64 tick) { | ||
| 64 | master_semaphore->Wait(tick); | ||
| 65 | } | ||
| 66 | |||
| 67 | void VKScheduler::Flush(VkSemaphore semaphore) { | 55 | void VKScheduler::Flush(VkSemaphore semaphore) { |
| 68 | SubmitExecution(semaphore); | 56 | SubmitExecution(semaphore); |
| 69 | AllocateNewContext(); | 57 | AllocateNewContext(); |
| @@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() { | |||
| 269 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | 257 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | |
| 270 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | 258 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | |
| 271 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | 259 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| 272 | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | 260 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, |
| 273 | vk::Span(barriers.data(), num_images)); | 261 | vk::Span(barriers.data(), num_images)); |
| 274 | }); | 262 | }); |
| 275 | state.renderpass = nullptr; | 263 | state.renderpass = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 15f2987eb..3ce48e9d2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/threadsafe_queue.h" | 16 | #include "common/threadsafe_queue.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| @@ -21,7 +22,6 @@ namespace Vulkan { | |||
| 21 | class CommandPool; | 22 | class CommandPool; |
| 22 | class Device; | 23 | class Device; |
| 23 | class Framebuffer; | 24 | class Framebuffer; |
| 24 | class MasterSemaphore; | ||
| 25 | class StateTracker; | 25 | class StateTracker; |
| 26 | class VKQueryCache; | 26 | class VKQueryCache; |
| 27 | 27 | ||
| @@ -32,15 +32,6 @@ public: | |||
| 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); | 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); |
| 33 | ~VKScheduler(); | 33 | ~VKScheduler(); |
| 34 | 34 | ||
| 35 | /// Returns the current command buffer tick. | ||
| 36 | [[nodiscard]] u64 CurrentTick() const noexcept; | ||
| 37 | |||
| 38 | /// Returns true when a tick has been triggered by the GPU. | ||
| 39 | [[nodiscard]] bool IsFree(u64 tick) const noexcept; | ||
| 40 | |||
| 41 | /// Waits for the given tick to trigger on the GPU. | ||
| 42 | void Wait(u64 tick); | ||
| 43 | |||
| 44 | /// Sends the current execution context to the GPU. | 35 | /// Sends the current execution context to the GPU. |
| 45 | void Flush(VkSemaphore semaphore = nullptr); | 36 | void Flush(VkSemaphore semaphore = nullptr); |
| 46 | 37 | ||
| @@ -82,6 +73,21 @@ public: | |||
| 82 | (void)chunk->Record(command); | 73 | (void)chunk->Record(command); |
| 83 | } | 74 | } |
| 84 | 75 | ||
| 76 | /// Returns the current command buffer tick. | ||
| 77 | [[nodiscard]] u64 CurrentTick() const noexcept { | ||
| 78 | return master_semaphore->CurrentTick(); | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Returns true when a tick has been triggered by the GPU. | ||
| 82 | [[nodiscard]] bool IsFree(u64 tick) const noexcept { | ||
| 83 | return master_semaphore->IsFree(tick); | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Waits for the given tick to trigger on the GPU. | ||
| 87 | void Wait(u64 tick) { | ||
| 88 | master_semaphore->Wait(tick); | ||
| 89 | } | ||
| 90 | |||
| 85 | /// Returns the master timeline semaphore. | 91 | /// Returns the master timeline semaphore. |
| 86 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { | 92 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { |
| 87 | return *master_semaphore; | 93 | return *master_semaphore; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 61d52b961..40e2e0d38 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3106 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); | 3106 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); |
| 3107 | } | 3107 | } |
| 3108 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | 3108 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { |
| 3109 | entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); | 3109 | entries.global_buffers.emplace_back(GlobalBufferEntry{ |
| 3110 | .cbuf_index = base.cbuf_index, | ||
| 3111 | .cbuf_offset = base.cbuf_offset, | ||
| 3112 | .is_written = usage.is_written, | ||
| 3113 | }); | ||
| 3110 | } | 3114 | } |
| 3111 | for (const auto& sampler : ir.GetSamplers()) { | 3115 | for (const auto& sampler : ir.GetSamplers()) { |
| 3112 | if (sampler.is_buffer) { | 3116 | if (sampler.is_buffer) { |
| @@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3127 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | 3131 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); |
| 3128 | } | 3132 | } |
| 3129 | } | 3133 | } |
| 3134 | for (const auto& buffer : entries.const_buffers) { | ||
| 3135 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 3136 | } | ||
| 3130 | entries.clip_distances = ir.GetClipDistances(); | 3137 | entries.clip_distances = ir.GetClipDistances(); |
| 3131 | entries.shader_length = ir.GetLength(); | 3138 | entries.shader_length = ir.GetLength(); |
| 3132 | entries.uses_warps = ir.UsesWarps(); | 3139 | entries.uses_warps = ir.UsesWarps(); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 26381e444..5d94132a5 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -39,24 +39,7 @@ private: | |||
| 39 | u32 index{}; | 39 | u32 index{}; |
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | class GlobalBufferEntry { | 42 | struct GlobalBufferEntry { |
| 43 | public: | ||
| 44 | constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) | ||
| 45 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} | ||
| 46 | |||
| 47 | constexpr u32 GetCbufIndex() const { | ||
| 48 | return cbuf_index; | ||
| 49 | } | ||
| 50 | |||
| 51 | constexpr u32 GetCbufOffset() const { | ||
| 52 | return cbuf_offset; | ||
| 53 | } | ||
| 54 | |||
| 55 | constexpr bool IsWritten() const { | ||
| 56 | return is_written; | ||
| 57 | } | ||
| 58 | |||
| 59 | private: | ||
| 60 | u32 cbuf_index{}; | 43 | u32 cbuf_index{}; |
| 61 | u32 cbuf_offset{}; | 44 | u32 cbuf_offset{}; |
| 62 | bool is_written{}; | 45 | bool is_written{}; |
| @@ -78,6 +61,7 @@ struct ShaderEntries { | |||
| 78 | std::set<u32> attributes; | 61 | std::set<u32> attributes; |
| 79 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 62 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 80 | std::size_t shader_length{}; | 63 | std::size_t shader_length{}; |
| 64 | u32 enabled_uniform_buffers{}; | ||
| 81 | bool uses_warps{}; | 65 | bool uses_warps{}; |
| 82 | }; | 66 | }; |
| 83 | 67 | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..dfd8c8e5a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 12 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -17,18 +18,119 @@ | |||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | namespace { | ||
| 22 | // Maximum potential alignment of a Vulkan buffer | ||
| 23 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||
| 24 | // Maximum size to put elements in the stream buffer | ||
| 25 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; | ||
| 26 | // Stream buffer size in bytes | ||
| 27 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||
| 28 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 29 | |||
| 30 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||
| 31 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 32 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||
| 33 | |||
| 34 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||
| 35 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 39 | VkMemoryPropertyFlags flags) noexcept { | ||
| 40 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 41 | if (((type_mask >> type_index) & 1) == 0) { | ||
| 42 | // Memory type is incompatible | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||
| 46 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 47 | // Memory type doesn't have the flags we want | ||
| 48 | continue; | ||
| 49 | } | ||
| 50 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||
| 51 | // Memory heap is not suitable for streaming | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | // Success! | ||
| 55 | return type_index; | ||
| 56 | } | ||
| 57 | return std::nullopt; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { | ||
| 61 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | ||
| 62 | std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||
| 63 | if (type) { | ||
| 64 | return *type; | ||
| 65 | } | ||
| 66 | // Otherwise try without the DEVICE_LOCAL_BIT | ||
| 67 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||
| 68 | if (type) { | ||
| 69 | return *type; | ||
| 70 | } | ||
| 71 | // This should never happen, and in case it does, signal it as an out of memory situation | ||
| 72 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||
| 73 | } | ||
| 74 | |||
| 75 | size_t Region(size_t iterator) noexcept { | ||
| 76 | return iterator / REGION_SIZE; | ||
| 77 | } | ||
| 78 | } // Anonymous namespace | ||
| 20 | 79 | ||
| 21 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 80 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 22 | VKScheduler& scheduler_) | 81 | VKScheduler& scheduler_) |
| 23 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} | 82 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 83 | const vk::Device& dev = device.GetLogical(); | ||
| 84 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||
| 85 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 86 | .pNext = nullptr, | ||
| 87 | .flags = 0, | ||
| 88 | .size = STREAM_BUFFER_SIZE, | ||
| 89 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 90 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | ||
| 91 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 92 | .queueFamilyIndexCount = 0, | ||
| 93 | .pQueueFamilyIndices = nullptr, | ||
| 94 | }); | ||
| 95 | if (device.HasDebuggingToolAttached()) { | ||
| 96 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||
| 97 | } | ||
| 98 | VkMemoryDedicatedRequirements dedicated_reqs{ | ||
| 99 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||
| 100 | .pNext = nullptr, | ||
| 101 | .prefersDedicatedAllocation = VK_FALSE, | ||
| 102 | .requiresDedicatedAllocation = VK_FALSE, | ||
| 103 | }; | ||
| 104 | const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||
| 105 | const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||
| 106 | dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||
| 107 | const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||
| 108 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||
| 109 | .pNext = nullptr, | ||
| 110 | .image = nullptr, | ||
| 111 | .buffer = *stream_buffer, | ||
| 112 | }; | ||
| 113 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||
| 114 | stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ | ||
| 115 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||
| 116 | .pNext = make_dedicated ? &dedicated_info : nullptr, | ||
| 117 | .allocationSize = requirements.size, | ||
| 118 | .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), | ||
| 119 | }); | ||
| 120 | if (device.HasDebuggingToolAttached()) { | ||
| 121 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||
| 122 | } | ||
| 123 | stream_buffer.BindMemory(*stream_memory, 0); | ||
| 124 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||
| 125 | } | ||
| 24 | 126 | ||
| 25 | StagingBufferPool::~StagingBufferPool() = default; | 127 | StagingBufferPool::~StagingBufferPool() = default; |
| 26 | 128 | ||
| 27 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { | 129 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { |
| 28 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | 130 | if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { |
| 29 | return *ref; | 131 | return GetStreamBuffer(size); |
| 30 | } | 132 | } |
| 31 | return CreateStagingBuffer(size, usage); | 133 | return GetStagingBuffer(size, usage); |
| 32 | } | 134 | } |
| 33 | 135 | ||
| 34 | void StagingBufferPool::TickFrame() { | 136 | void StagingBufferPool::TickFrame() { |
| @@ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() { | |||
| 39 | ReleaseCache(MemoryUsage::Download); | 141 | ReleaseCache(MemoryUsage::Download); |
| 40 | } | 142 | } |
| 41 | 143 | ||
| 144 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||
| 145 | if (AreRegionsActive(Region(free_iterator) + 1, | ||
| 146 | std::min(Region(iterator + size) + 1, NUM_SYNCS))) { | ||
| 147 | // Avoid waiting for the previous usages to be free | ||
| 148 | return GetStagingBuffer(size, MemoryUsage::Upload); | ||
| 149 | } | ||
| 150 | const u64 current_tick = scheduler.CurrentTick(); | ||
| 151 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), | ||
| 152 | current_tick); | ||
| 153 | used_iterator = iterator; | ||
| 154 | free_iterator = std::max(free_iterator, iterator + size); | ||
| 155 | |||
| 156 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 157 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||
| 158 | current_tick); | ||
| 159 | used_iterator = 0; | ||
| 160 | iterator = 0; | ||
| 161 | free_iterator = size; | ||
| 162 | |||
| 163 | if (AreRegionsActive(0, Region(size) + 1)) { | ||
| 164 | // Avoid waiting for the previous usages to be free | ||
| 165 | return GetStagingBuffer(size, MemoryUsage::Upload); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | const size_t offset = iterator; | ||
| 169 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 170 | return StagingBufferRef{ | ||
| 171 | .buffer = *stream_buffer, | ||
| 172 | .offset = static_cast<VkDeviceSize>(offset), | ||
| 173 | .mapped_span = std::span<u8>(stream_pointer + offset, size), | ||
| 174 | }; | ||
| 175 | } | ||
| 176 | |||
| 177 | bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const { | ||
| 178 | return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end, | ||
| 179 | [this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); }); | ||
| 180 | }; | ||
| 181 | |||
| 182 | StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) { | ||
| 183 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | ||
| 184 | return *ref; | ||
| 185 | } | ||
| 186 | return CreateStagingBuffer(size, usage); | ||
| 187 | } | ||
| 188 | |||
| 42 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, | 189 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, |
| 43 | MemoryUsage usage) { | 190 | MemoryUsage usage) { |
| 44 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; | 191 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..69f7618de 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -19,11 +19,14 @@ class VKScheduler; | |||
| 19 | 19 | ||
| 20 | struct StagingBufferRef { | 20 | struct StagingBufferRef { |
| 21 | VkBuffer buffer; | 21 | VkBuffer buffer; |
| 22 | VkDeviceSize offset; | ||
| 22 | std::span<u8> mapped_span; | 23 | std::span<u8> mapped_span; |
| 23 | }; | 24 | }; |
| 24 | 25 | ||
| 25 | class StagingBufferPool { | 26 | class StagingBufferPool { |
| 26 | public: | 27 | public: |
| 28 | static constexpr size_t NUM_SYNCS = 16; | ||
| 29 | |||
| 27 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, | 30 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, |
| 28 | VKScheduler& scheduler); | 31 | VKScheduler& scheduler); |
| 29 | ~StagingBufferPool(); | 32 | ~StagingBufferPool(); |
| @@ -33,6 +36,11 @@ public: | |||
| 33 | void TickFrame(); | 36 | void TickFrame(); |
| 34 | 37 | ||
| 35 | private: | 38 | private: |
| 39 | struct StreamBufferCommit { | ||
| 40 | size_t upper_bound; | ||
| 41 | u64 tick; | ||
| 42 | }; | ||
| 43 | |||
| 36 | struct StagingBuffer { | 44 | struct StagingBuffer { |
| 37 | vk::Buffer buffer; | 45 | vk::Buffer buffer; |
| 38 | MemoryCommit commit; | 46 | MemoryCommit commit; |
| @@ -42,6 +50,7 @@ private: | |||
| 42 | StagingBufferRef Ref() const noexcept { | 50 | StagingBufferRef Ref() const noexcept { |
| 43 | return { | 51 | return { |
| 44 | .buffer = *buffer, | 52 | .buffer = *buffer, |
| 53 | .offset = 0, | ||
| 45 | .mapped_span = mapped_span, | 54 | .mapped_span = mapped_span, |
| 46 | }; | 55 | }; |
| 47 | } | 56 | } |
| @@ -56,6 +65,12 @@ private: | |||
| 56 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; | 65 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; |
| 57 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; | 66 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; |
| 58 | 67 | ||
| 68 | StagingBufferRef GetStreamBuffer(size_t size); | ||
| 69 | |||
| 70 | bool AreRegionsActive(size_t region_begin, size_t region_end) const; | ||
| 71 | |||
| 72 | StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage); | ||
| 73 | |||
| 59 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); | 74 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); |
| 60 | 75 | ||
| 61 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); | 76 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); |
| @@ -70,6 +85,15 @@ private: | |||
| 70 | MemoryAllocator& memory_allocator; | 85 | MemoryAllocator& memory_allocator; |
| 71 | VKScheduler& scheduler; | 86 | VKScheduler& scheduler; |
| 72 | 87 | ||
| 88 | vk::Buffer stream_buffer; | ||
| 89 | vk::DeviceMemory stream_memory; | ||
| 90 | u8* stream_pointer = nullptr; | ||
| 91 | |||
| 92 | size_t iterator = 0; | ||
| 93 | size_t used_iterator = 0; | ||
| 94 | size_t free_iterator = 0; | ||
| 95 | std::array<u64, NUM_SYNCS> sync_ticks{}; | ||
| 96 | |||
| 73 | StagingBuffersCache device_local_cache; | 97 | StagingBuffersCache device_local_cache; |
| 74 | StagingBuffersCache upload_cache; | 98 | StagingBuffersCache upload_cache; |
| 75 | StagingBuffersCache download_cache; | 99 | StagingBuffersCache download_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 1779a2e30..e81fad007 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| 30 | using Flags = Maxwell3D::DirtyState::Flags; | 30 | using Flags = Maxwell3D::DirtyState::Flags; |
| 31 | 31 | ||
| 32 | Flags MakeInvalidationFlags() { | 32 | Flags MakeInvalidationFlags() { |
| 33 | static constexpr std::array INVALIDATION_FLAGS{ | 33 | static constexpr int INVALIDATION_FLAGS[]{ |
| 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, |
| 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, |
| 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, | 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, |
| 37 | }; | 37 | }; |
| 38 | Flags flags{}; | 38 | Flags flags{}; |
| 39 | for (const int flag : INVALIDATION_FLAGS) { | 39 | for (const int flag : INVALIDATION_FLAGS) { |
| 40 | flags[flag] = true; | 40 | flags[flag] = true; |
| 41 | } | 41 | } |
| 42 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | ||
| 43 | flags[index] = true; | ||
| 44 | } | ||
| 42 | return flags; | 45 | return flags; |
| 43 | } | 46 | } |
| 44 | 47 | ||
| @@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) { | |||
| 130 | StateTracker::StateTracker(Tegra::GPU& gpu) | 133 | StateTracker::StateTracker(Tegra::GPU& gpu) |
| 131 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 134 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { |
| 132 | auto& tables = gpu.Maxwell3D().dirty.tables; | 135 | auto& tables = gpu.Maxwell3D().dirty.tables; |
| 133 | SetupDirtyRenderTargets(tables); | 136 | SetupDirtyFlags(tables); |
| 134 | SetupDirtyViewports(tables); | 137 | SetupDirtyViewports(tables); |
| 135 | SetupDirtyScissors(tables); | 138 | SetupDirtyScissors(tables); |
| 136 | SetupDirtyDepthBias(tables); | 139 | SetupDirtyDepthBias(tables); |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 725a2a05d..0b63bd6c8 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi | |||
| 56 | 56 | ||
| 57 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 58 | 58 | ||
| 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) | 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_, |
| 60 | : surface{surface_}, device{device_}, scheduler{scheduler_} {} | 60 | u32 width, u32 height, bool srgb) |
| 61 | : surface{surface_}, device{device_}, scheduler{scheduler_} { | ||
| 62 | Create(width, height, srgb); | ||
| 63 | } | ||
| 61 | 64 | ||
| 62 | VKSwapchain::~VKSwapchain() = default; | 65 | VKSwapchain::~VKSwapchain() = default; |
| 63 | 66 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2eadd62b3..a728511e0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -20,7 +20,8 @@ class VKScheduler; | |||
| 20 | 20 | ||
| 21 | class VKSwapchain { | 21 | class VKSwapchain { |
| 22 | public: | 22 | public: |
| 23 | explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); | 23 | explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler, |
| 24 | u32 width, u32 height, bool srgb); | ||
| 24 | ~VKSwapchain(); | 25 | ~VKSwapchain(); |
| 25 | 26 | ||
| 26 | /// Creates (or recreates) the swapchain with a given size. | 27 | /// Creates (or recreates) the swapchain with a given size. |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index aa7c5d7c6..22a1014a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, | 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, |
| 427 | VkImageAspectFlags aspect_mask, bool is_initialized, | 427 | VkImageAspectFlags aspect_mask, bool is_initialized, |
| 428 | std::span<const VkBufferImageCopy> copies) { | 428 | std::span<const VkBufferImageCopy> copies) { |
| 429 | static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | | 429 | static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = |
| 430 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 430 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 432 | static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | | ||
| 433 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 434 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | ||
| 432 | const VkImageMemoryBarrier read_barrier{ | 435 | const VkImageMemoryBarrier read_barrier{ |
| 433 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 436 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 434 | .pNext = nullptr, | 437 | .pNext = nullptr, |
| 435 | .srcAccessMask = ACCESS_FLAGS, | 438 | .srcAccessMask = WRITE_ACCESS_FLAGS, |
| 436 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 439 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 437 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | 440 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, |
| 438 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 441 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 439 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 442 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 440 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 443 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 441 | .image = image, | 444 | .image = image, |
| 442 | .subresourceRange = | 445 | .subresourceRange{ |
| 443 | { | 446 | .aspectMask = aspect_mask, |
| 444 | .aspectMask = aspect_mask, | 447 | .baseMipLevel = 0, |
| 445 | .baseMipLevel = 0, | 448 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 446 | .levelCount = VK_REMAINING_MIP_LEVELS, | 449 | .baseArrayLayer = 0, |
| 447 | .baseArrayLayer = 0, | 450 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 448 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 451 | }, |
| 449 | }, | ||
| 450 | }; | 452 | }; |
| 451 | const VkImageMemoryBarrier write_barrier{ | 453 | const VkImageMemoryBarrier write_barrier{ |
| 452 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 454 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 453 | .pNext = nullptr, | 455 | .pNext = nullptr, |
| 454 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 456 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 455 | .dstAccessMask = ACCESS_FLAGS, | 457 | .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, |
| 456 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 458 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 457 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 459 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 458 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 460 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 459 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 461 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 460 | .image = image, | 462 | .image = image, |
| 461 | .subresourceRange = | 463 | .subresourceRange{ |
| 462 | { | 464 | .aspectMask = aspect_mask, |
| 463 | .aspectMask = aspect_mask, | 465 | .baseMipLevel = 0, |
| 464 | .baseMipLevel = 0, | 466 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 465 | .levelCount = VK_REMAINING_MIP_LEVELS, | 467 | .baseArrayLayer = 0, |
| 466 | .baseArrayLayer = 0, | 468 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 467 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 469 | }, |
| 468 | }, | ||
| 469 | }; | 470 | }; |
| 470 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | 471 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, |
| 471 | read_barrier); | 472 | read_barrier); |
| @@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() { | |||
| 569 | scheduler.Finish(); | 570 | scheduler.Finish(); |
| 570 | } | 571 | } |
| 571 | 572 | ||
| 572 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | 573 | StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 573 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); | 574 | return staging_buffer_pool.Request(size, MemoryUsage::Upload); |
| 574 | return { | ||
| 575 | .handle = staging_ref.buffer, | ||
| 576 | .span = staging_ref.mapped_span, | ||
| 577 | }; | ||
| 578 | } | 575 | } |
| 579 | 576 | ||
| 580 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | 577 | StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 581 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); | 578 | return staging_buffer_pool.Request(size, MemoryUsage::Download); |
| 582 | return { | ||
| 583 | .handle = staging_ref.buffer, | ||
| 584 | .span = staging_ref.mapped_span, | ||
| 585 | }; | ||
| 586 | } | 579 | } |
| 587 | 580 | ||
| 588 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 581 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| @@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 754 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 747 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 755 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 748 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 756 | VK_ACCESS_TRANSFER_WRITE_BIT, | 749 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 757 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 750 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 758 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 751 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 759 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 752 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 760 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 753 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| @@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 765 | VkImageMemoryBarrier{ | 758 | VkImageMemoryBarrier{ |
| 766 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 759 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 767 | .pNext = nullptr, | 760 | .pNext = nullptr, |
| 768 | .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | 761 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 769 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 770 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 771 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 772 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 762 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 773 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | 763 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 774 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 764 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 775 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 765 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 776 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 766 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| @@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 828 | } | 818 | } |
| 829 | } | 819 | } |
| 830 | 820 | ||
| 831 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 821 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 832 | std::span<const BufferImageCopy> copies) { | ||
| 833 | // TODO: Move this to another API | 822 | // TODO: Move this to another API |
| 834 | scheduler->RequestOutsideRenderPassOperationContext(); | 823 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 835 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 824 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 836 | const VkBuffer src_buffer = map.handle; | 825 | const VkBuffer src_buffer = map.buffer; |
| 837 | const VkImage vk_image = *image; | 826 | const VkImage vk_image = *image; |
| 838 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 827 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 839 | const bool is_initialized = std::exchange(initialized, true); | 828 | const bool is_initialized = std::exchange(initialized, true); |
| @@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 843 | }); | 832 | }); |
| 844 | } | 833 | } |
| 845 | 834 | ||
| 846 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 835 | void Image::UploadMemory(const StagingBufferRef& map, |
| 847 | std::span<const VideoCommon::BufferCopy> copies) { | 836 | std::span<const VideoCommon::BufferCopy> copies) { |
| 848 | // TODO: Move this to another API | 837 | // TODO: Move this to another API |
| 849 | scheduler->RequestOutsideRenderPassOperationContext(); | 838 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 850 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | 839 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); |
| 851 | const VkBuffer src_buffer = map.handle; | 840 | const VkBuffer src_buffer = map.buffer; |
| 852 | const VkBuffer dst_buffer = *buffer; | 841 | const VkBuffer dst_buffer = *buffer; |
| 853 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 842 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 854 | // TODO: Barriers | 843 | // TODO: Barriers |
| @@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 856 | }); | 845 | }); |
| 857 | } | 846 | } |
| 858 | 847 | ||
| 859 | void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 848 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 860 | std::span<const BufferImageCopy> copies) { | 849 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 861 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 850 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, |
| 862 | scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, | ||
| 863 | vk_copies](vk::CommandBuffer cmdbuf) { | 851 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 864 | // TODO: Barriers | 852 | const VkImageMemoryBarrier read_barrier{ |
| 865 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | 853 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 854 | .pNext = nullptr, | ||
| 855 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 856 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 857 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 858 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 859 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 860 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 861 | .image = image, | ||
| 862 | .subresourceRange{ | ||
| 863 | .aspectMask = aspect_mask, | ||
| 864 | .baseMipLevel = 0, | ||
| 865 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 866 | .baseArrayLayer = 0, | ||
| 867 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 868 | }, | ||
| 869 | }; | ||
| 870 | const VkImageMemoryBarrier image_write_barrier{ | ||
| 871 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 872 | .pNext = nullptr, | ||
| 873 | .srcAccessMask = 0, | ||
| 874 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 875 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 876 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 877 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 878 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 879 | .image = image, | ||
| 880 | .subresourceRange{ | ||
| 881 | .aspectMask = aspect_mask, | ||
| 882 | .baseMipLevel = 0, | ||
| 883 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 884 | .baseArrayLayer = 0, | ||
| 885 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 886 | }, | ||
| 887 | }; | ||
| 888 | const VkMemoryBarrier memory_write_barrier{ | ||
| 889 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 890 | .pNext = nullptr, | ||
| 891 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 892 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 893 | }; | ||
| 894 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 895 | 0, read_barrier); | ||
| 896 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||
| 897 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 898 | 0, memory_write_barrier, nullptr, image_write_barrier); | ||
| 866 | }); | 899 | }); |
| 867 | } | 900 | } |
| 868 | 901 | ||
| @@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1127 | .pAttachments = attachments.data(), | 1160 | .pAttachments = attachments.data(), |
| 1128 | .width = key.size.width, | 1161 | .width = key.size.width, |
| 1129 | .height = key.size.height, | 1162 | .height = key.size.height, |
| 1130 | .layers = static_cast<u32>(num_layers), | 1163 | .layers = static_cast<u32>(std::max(num_layers, 1)), |
| 1131 | }); | 1164 | }); |
| 1132 | if (runtime.device.HasDebuggingToolAttached()) { | 1165 | if (runtime.device.HasDebuggingToolAttached()) { |
| 1133 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | 1166 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8d29361a1..b08c23459 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <compare> | 7 | #include <compare> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 10 | #include "video_core/texture_cache/texture_cache.h" | 11 | #include "video_core/texture_cache/texture_cache.h" |
| 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> { | |||
| 53 | 54 | ||
| 54 | namespace Vulkan { | 55 | namespace Vulkan { |
| 55 | 56 | ||
| 56 | struct ImageBufferMap { | ||
| 57 | [[nodiscard]] VkBuffer Handle() const noexcept { | ||
| 58 | return handle; | ||
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] std::span<u8> Span() const noexcept { | ||
| 62 | return span; | ||
| 63 | } | ||
| 64 | |||
| 65 | VkBuffer handle; | ||
| 66 | std::span<u8> span; | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct TextureCacheRuntime { | 57 | struct TextureCacheRuntime { |
| 70 | const Device& device; | 58 | const Device& device; |
| 71 | VKScheduler& scheduler; | 59 | VKScheduler& scheduler; |
| @@ -76,9 +64,9 @@ struct TextureCacheRuntime { | |||
| 76 | 64 | ||
| 77 | void Finish(); | 65 | void Finish(); |
| 78 | 66 | ||
| 79 | [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); | 67 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 80 | 68 | ||
| 81 | [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
| 82 | 70 | ||
| 83 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 71 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 84 | const std::array<Offset2D, 2>& dst_region, | 72 | const std::array<Offset2D, 2>& dst_region, |
| @@ -94,7 +82,7 @@ struct TextureCacheRuntime { | |||
| 94 | return false; | 82 | return false; |
| 95 | } | 83 | } |
| 96 | 84 | ||
| 97 | void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, | 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, |
| 98 | std::span<const VideoCommon::SwizzleParameters>) { | 86 | std::span<const VideoCommon::SwizzleParameters>) { |
| 99 | UNREACHABLE(); | 87 | UNREACHABLE(); |
| 100 | } | 88 | } |
| @@ -112,13 +100,12 @@ public: | |||
| 112 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 113 | VAddr cpu_addr); | 101 | VAddr cpu_addr); |
| 114 | 102 | ||
| 115 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 103 | void UploadMemory(const StagingBufferRef& map, |
| 116 | std::span<const VideoCommon::BufferImageCopy> copies); | 104 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 117 | 105 | ||
| 118 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 106 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); |
| 119 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 120 | 107 | ||
| 121 | void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 108 | void DownloadMemory(const StagingBufferRef& map, |
| 122 | std::span<const VideoCommon::BufferImageCopy> copies); | 109 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 123 | 110 | ||
| 124 | [[nodiscard]] VkImage Handle() const noexcept { | 111 | [[nodiscard]] VkImage Handle() const noexcept { |