From 8e56a84566036cfff0aa5c3d80ae1b051d2bd0bf Mon Sep 17 00:00:00 2001
From: Morph
Date: Sun, 23 Apr 2023 00:01:08 -0400
Subject: core_timing: Use CNTPCT as the guest CPU tick
Previously, we were mixing the raw CPU frequency and CNTFRQ.
The raw CPU frequency (1020 MHz) should've never been used as CNTPCT (whose frequency is CNTFRQ) is the only counter available.
---
src/video_core/gpu.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 456f733cf..70762c51a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,17 +194,17 @@ struct GPU::Impl {
[[nodiscard]] u64 GetTicks() const {
// This values were reversed engineered by fincs from NVN
- // The gpu clock is reported in units of 385/625 nanoseconds
- constexpr u64 gpu_ticks_num = 384;
- constexpr u64 gpu_ticks_den = 625;
+ // The GPU clock is 614.4 MHz
+ using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>;
+ static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625);
+
+ u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
- u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
if (Settings::values.use_fast_gpu_time.GetValue()) {
nanoseconds /= 256;
}
- const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
- const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
- return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+
+ return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
}
[[nodiscard]] bool IsAsync() const {
--
cgit v1.2.3
From 907507886d755fa56099713c4b8f05bb640a8b7d Mon Sep 17 00:00:00 2001
From: Morph
Date: Sun, 28 May 2023 17:45:47 -0400
Subject: (wall, native)_clock: Add GetGPUTick
Allows us to directly calculate the GPU tick without double conversion to and from the host clock tick.
---
src/video_core/gpu.cpp | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 70762c51a..db385076d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -193,18 +193,13 @@ struct GPU::Impl {
}
[[nodiscard]] u64 GetTicks() const {
- // This values were reversed engineered by fincs from NVN
- // The GPU clock is 614.4 MHz
- using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>;
- static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625);
-
- u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
+ u64 gpu_tick = system.CoreTiming().GetGPUTicks();
if (Settings::values.use_fast_gpu_time.GetValue()) {
- nanoseconds /= 256;
+ gpu_tick /= 256;
}
- return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
+ return gpu_tick;
}
[[nodiscard]] bool IsAsync() const {
--
cgit v1.2.3
From 8d6aefdcc452b602d94a84d13bbbc15f806b689c Mon Sep 17 00:00:00 2001
From: Liam
Date: Wed, 14 Jun 2023 14:11:46 -0400
Subject: video_core: optionally skip barriers on feedback loops
---
src/video_core/texture_cache/texture_cache.h | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'src/video_core')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..43b7ac0a6 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache
::FillComputeImageViews(std::span views) {
template
void TextureCache::CheckFeedbackLoop(std::span views) {
+ if (!Settings::values.barrier_feedback_loops.GetValue()) {
+ return;
+ }
+
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
--
cgit v1.2.3
From 76a676883a17523fb12eeac6f2b9702e4916b2c2 Mon Sep 17 00:00:00 2001
From: FengChen
Date: Sat, 17 Jun 2023 23:26:39 +0800
Subject: video_core: add samples check when find render target
---
src/video_core/texture_cache/texture_cache.h | 22 ++++++++++------------
src/video_core/texture_cache/texture_cache_base.h | 10 ++++------
2 files changed, 14 insertions(+), 18 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..f11998e20 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -280,7 +280,7 @@ void TextureCache::SynchronizeComputeDescriptors() {
}
template
-bool TextureCache::RescaleRenderTargets(bool is_clear) {
+bool TextureCache
::RescaleRenderTargets() {
auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
@@ -318,13 +318,13 @@ bool TextureCache
::RescaleRenderTargets(bool is_clear) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
if (flags[Dirty::ColorBuffer0 + index] || force) {
flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
}
check_rescale(color_buffer_id, tmp_color_images[index]);
}
if (flags[Dirty::ZetaBuffer] || force) {
flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
}
check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
@@ -389,7 +389,7 @@ void TextureCache
::UpdateRenderTargets(bool is_clear) {
return;
}
- const bool rescaled = RescaleRenderTargets(is_clear);
+ const bool rescaled = RescaleRenderTargets();
if (is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true;
@@ -1658,7 +1658,7 @@ SamplerId TextureCache
::FindSampler(const TSCEntry& config) {
}
template
-ImageViewId TextureCache::FindColorBuffer(size_t index, bool is_clear) {
+ImageViewId TextureCache
::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
@@ -1672,11 +1672,11 @@ ImageViewId TextureCache
::FindColorBuffer(size_t index, bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template
-ImageViewId TextureCache::FindDepthBuffer(bool is_clear) {
+ImageViewId TextureCache
::FindDepthBuffer() {
const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
@@ -1686,18 +1686,16 @@ ImageViewId TextureCache
::FindDepthBuffer(bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template
-ImageViewId TextureCache::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear) {
- const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ImageViewId TextureCache
::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
ImageId image_id{};
bool delete_state = has_deleted_images;
do {
has_deleted_images = false;
- image_id = FindOrInsertImage(info, gpu_addr, options);
+ image_id = FindOrInsertImage(info, gpu_addr);
delete_state |= has_deleted_images;
} while (has_deleted_images);
has_deleted_images = delete_state;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 3bfa92154..c347eccd6 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -166,9 +166,8 @@ public:
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled
- /// @param is_clear True when the render targets are being used for clears
/// @retval True if the Render Targets have been rescaled.
- bool RescaleRenderTargets(bool is_clear);
+ bool RescaleRenderTargets();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
@@ -324,14 +323,13 @@ private:
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
- [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
/// Find or create an image view for the depth buffer
- [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
+ [[nodiscard]] ImageViewId FindDepthBuffer();
/// Find or create a view for a render target with the given image parameters
- [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear);
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
/// Iterates over all the images in a region calling func
template
--
cgit v1.2.3
From 6448eade2ef126a88068cde66b77e7788c3fab08 Mon Sep 17 00:00:00 2001
From: lat9nq
Date: Sun, 18 Jun 2023 04:59:12 -0400
Subject: externals: Add vma and initialize it
video_core: Move vma implementation to library
---
src/video_core/CMakeLists.txt | 2 +-
src/video_core/vulkan_common/vulkan_device.cpp | 23 ++++++++++++++++++++++-
src/video_core/vulkan_common/vulkan_device.h | 3 +++
3 files changed, 26 insertions(+), 2 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index bf6439530..e9e6f278d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -291,7 +291,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
-target_link_libraries(video_core PRIVATE sirit Vulkan::Headers)
+target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma)
if (ENABLE_NSIGHT_AFTERMATH)
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3d2e9a16a..631d5e378 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -22,6 +22,10 @@
#include
#endif
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#include
+
namespace Vulkan {
using namespace Common::Literals;
namespace {
@@ -592,9 +596,26 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
+
+ const VmaVulkanFunctions functions = {
+ .vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr,
+ .vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr,
+ };
+
+ const VmaAllocatorCreateInfo allocator_info = {
+ .physicalDevice = physical,
+ .device = *logical,
+ .pVulkanFunctions = &functions,
+ .instance = instance,
+ .vulkanApiVersion = VK_API_VERSION_1_1,
+ };
+
+ vk::Check(vmaCreateAllocator(&allocator_info, &allocator));
}
-Device::~Device() = default;
+Device::~Device() {
+ vmaDestroyAllocator(allocator);
+}
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index f314d0ffe..123d3b1c4 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -13,6 +13,8 @@
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+VK_DEFINE_HANDLE(VmaAllocator)
+
// Define all features which may be used by the implementation here.
// Vulkan version in the macro describes the minimum version required for feature availability.
// If the Vulkan version is lower than the required version, the named extension is required.
@@ -618,6 +620,7 @@ private:
private:
VkInstance instance; ///< Vulkan instance.
+ VmaAllocator allocator; ///< VMA allocator.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
vk::Device logical; ///< Logical device.
--
cgit v1.2.3
From c60eed36b7439a7921ea5e86e1300e96e30c8f8a Mon Sep 17 00:00:00 2001
From: GPUCode
Date: Wed, 24 May 2023 20:32:12 +0300
Subject: memory_allocator: Remove OpenGL interop
* Appears to be unused atm
---
src/video_core/renderer_vulkan/renderer_vulkan.cpp | 4 +-
src/video_core/renderer_vulkan/vk_turbo_mode.cpp | 2 +-
.../vulkan_common/vulkan_memory_allocator.cpp | 58 +---------------------
.../vulkan_common/vulkan_memory_allocator.h | 11 ++--
4 files changed, 8 insertions(+), 67 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 77128c6e2..5bae8d24f 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -89,8 +89,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window.GetWindowInfo())),
- device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
- state_tracker(), scheduler(device, state_tracker),
+ device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
+ scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index a802d3c49..6417d7e31 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -18,7 +18,7 @@ using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
#ifndef ANDROID
- : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false}
+ : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device}
#endif
{
{
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index e28a556f8..f87c99603 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -6,8 +6,6 @@
#include
#include
-#include
-
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
@@ -54,17 +52,6 @@ struct Range {
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
-constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
- .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
- .pNext = nullptr,
-#ifdef _WIN32
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
-#elif __unix__
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
-#else
- .handleTypes = 0,
-#endif
-};
} // Anonymous namespace
class MemoryAllocation {
@@ -74,14 +61,6 @@ public:
: allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
property_flags{properties}, shifted_memory_type{1U << type} {}
-#if defined(_WIN32) || defined(__unix__)
- ~MemoryAllocation() {
- if (owning_opengl_handle != 0) {
- glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
- }
- }
-#endif
-
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
MemoryAllocation(const MemoryAllocation&) = delete;
@@ -120,31 +99,6 @@ public:
return memory_mapped_span;
}
-#ifdef _WIN32
- [[nodiscard]] u32 ExportOpenGLHandle() {
- if (!owning_opengl_handle) {
- glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
- glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
- GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
- memory.GetMemoryWin32HandleKHR());
- }
- return owning_opengl_handle;
- }
-#elif __unix__
- [[nodiscard]] u32 ExportOpenGLHandle() {
- if (!owning_opengl_handle) {
- glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
- glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
- memory.GetMemoryFdKHR());
- }
- return owning_opengl_handle;
- }
-#else
- [[nodiscard]] u32 ExportOpenGLHandle() {
- return 0;
- }
-#endif
-
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
@@ -182,9 +136,6 @@ private:
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
std::vector commits; ///< All commit ranges done from this allocation.
std::span memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
-#if defined(_WIN32) || defined(__unix__)
- u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
-#endif
};
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -216,19 +167,14 @@ std::span MemoryCommit::Map() {
return span;
}
-u32 MemoryCommit::ExportOpenGLHandle() const {
- return allocation->ExportOpenGLHandle();
-}
-
void MemoryCommit::Release() {
if (allocation) {
allocation->Free(begin);
}
}
-MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
+MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
- export_allocations{export_allocations_},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
@@ -271,7 +217,7 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
const u32 type = FindType(flags, type_mask).value();
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr,
+ .pNext = nullptr,
.allocationSize = size,
.memoryTypeIndex = type,
});
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index a5bff03fe..494f30f51 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -41,9 +41,6 @@ public:
/// It will map the backing allocation if it hasn't been mapped before.
std::span Map();
- /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
- u32 ExportOpenGLHandle() const;
-
/// Returns the Vulkan memory handler.
VkDeviceMemory Memory() const {
return memory;
@@ -74,11 +71,10 @@ public:
* Construct memory allocator
*
* @param device_ Device to allocate from
- * @param export_allocations_ True when allocations have to be exported
*
* @throw vk::Exception on failure
*/
- explicit MemoryAllocator(const Device& device_, bool export_allocations_);
+ explicit MemoryAllocator(const Device& device_);
~MemoryAllocator();
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
@@ -117,9 +113,8 @@ private:
/// Returns index to the fastest memory type compatible with the passed requirements.
std::optional FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
- const Device& device; ///< Device handle.
- const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
- const bool export_allocations; ///< True when memory allocations have to be exported.
+ const Device& device; ///< Device handle.
+ const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
std::vector> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
--
cgit v1.2.3
From 48e39756f1ec6e6b0ef48f2444ce38a4e861e898 Mon Sep 17 00:00:00 2001
From: GPUCode
Date: Wed, 24 May 2023 22:39:58 +0300
Subject: renderer_vulkan: Use VMA for images
---
src/video_core/renderer_vulkan/renderer_vulkan.cpp | 3 +-
src/video_core/renderer_vulkan/vk_blit_screen.cpp | 12 +---
src/video_core/renderer_vulkan/vk_blit_screen.h | 2 -
src/video_core/renderer_vulkan/vk_fsr.cpp | 4 +-
src/video_core/renderer_vulkan/vk_fsr.h | 1 -
.../renderer_vulkan/vk_present_manager.cpp | 4 +-
.../renderer_vulkan/vk_present_manager.h | 1 -
src/video_core/renderer_vulkan/vk_smaa.cpp | 27 +++------
src/video_core/renderer_vulkan/vk_smaa.h | 2 -
.../renderer_vulkan/vk_texture_cache.cpp | 14 ++---
src/video_core/renderer_vulkan/vk_texture_cache.h | 2 -
src/video_core/vulkan_common/vulkan_device.h | 5 ++
.../vulkan_common/vulkan_memory_allocator.cpp | 30 +++++++---
.../vulkan_common/vulkan_memory_allocator.h | 5 +-
src/video_core/vulkan_common/vulkan_wrapper.cpp | 28 ++++-----
src/video_core/vulkan_common/vulkan_wrapper.h | 70 ++++++++++++++++++----
16 files changed, 119 insertions(+), 91 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 5bae8d24f..e569523b6 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -173,7 +173,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
- vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
+ vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -196,7 +196,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index acb143fc7..82ca81c7e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -1071,12 +1071,8 @@ void BlitScreen::ReleaseRawImages() {
scheduler.Wait(tick);
}
raw_images.clear();
- raw_buffer_commits.clear();
-
aa_image_view.reset();
aa_image.reset();
- aa_commit = MemoryCommit{};
-
buffer.reset();
buffer_commit = MemoryCommit{};
}
@@ -1101,13 +1097,12 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
- raw_buffer_commits.resize(image_count);
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- return device.GetLogical().CreateImage(VkImageCreateInfo{
+ return memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -1130,9 +1125,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
};
- const auto create_commit = [&](vk::Image& image) {
- return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
- };
const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -1161,7 +1153,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
- raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
@@ -1169,7 +1160,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
- aa_commit = create_commit(aa_image);
aa_image_view = create_image_view(aa_image, true);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 68ec20253..7fcfa9976 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -148,7 +148,6 @@ private:
std::vector raw_images;
std::vector raw_image_views;
- std::vector raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
@@ -159,7 +158,6 @@ private:
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
- MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index df972cd54..9bcdca2fb 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() {
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
- buffer_commits.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
- images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -231,7 +230,6 @@ void FSR::CreateImages() {
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 5d872861f..8bb9fc23a 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -47,7 +47,6 @@ private:
vk::Sampler sampler;
std::vector images;
std::vector image_views;
- std::vector buffer_commits;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index 10ace0420..d681bd22a 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
frame->height = height;
frame->is_srgb = is_srgb;
- frame->image = dld.CreateImage({
+ frame->image = memory_allocator.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
- frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
-
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 4ac2e2395..83e859416 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -29,7 +29,6 @@ struct Frame {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
- MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index f8735189d..ff7c3a419 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -25,9 +25,7 @@ namespace {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
-std::pair CreateWrappedImage(const Device& device,
- MemoryAllocator& allocator,
- VkExtent2D dimensions, VkFormat format) {
+vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
const VkImageCreateInfo image_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
@@ -46,11 +44,7 @@ std::pair CreateWrappedImage(const Device& device,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
-
- auto image = device.GetLogical().CreateImage(image_ci);
- auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal);
-
- return std::make_pair(std::move(image), std::move(commit));
+ return allocator.CreateImage(image_ci);
}
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
@@ -531,10 +525,8 @@ void SMAA::CreateImages() {
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
- std::tie(m_static_images[Area], m_static_buffer_commits[Area]) =
- CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
- std::tie(m_static_images[Search], m_static_buffer_commits[Search]) =
- CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM);
+ m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
+ m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
@@ -544,12 +536,11 @@ void SMAA::CreateImages() {
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
- std::tie(images.images[Blend], images.buffer_commits[Blend]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
- std::tie(images.images[Edges], images.buffer_commits[Edges]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
- std::tie(images.images[Output], images.buffer_commits[Output]) =
- CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Blend] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
+ images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
+ images.images[Output] =
+ CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h
index 99a369148..0e214258a 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.h
+++ b/src/video_core/renderer_vulkan/vk_smaa.h
@@ -66,13 +66,11 @@ private:
std::array m_pipelines{};
std::array m_renderpasses{};
- std::array m_static_buffer_commits;
std::array m_static_images{};
std::array m_static_image_views{};
struct Images {
vk::DescriptorSets descriptor_sets{};
- std::array buffer_commits;
std::array images{};
std::array image_views{};
std::array framebuffers{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f025f618b..10defe6cb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,7 +15,6 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
-#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) {
};
}
-[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
+[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
+ const ImageInfo& info) {
if (info.type == ImageType::Buffer) {
return vk::Image{};
}
- return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
+ return allocator.CreateImage(MakeImageCreateInfo(device, info));
}
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@@ -1266,8 +1266,8 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
- runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
- commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
+ runtime{&runtime_},
+ original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.async_astc.GetValue()) {
@@ -1467,9 +1467,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(runtime->device, scaled_info);
- auto& allocator = runtime->memory_allocator;
- scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
+ scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
ignore = false;
}
current_image = *scaled_image;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f14525dcb..9481f2531 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -180,12 +180,10 @@ private:
TextureCacheRuntime* runtime{};
vk::Image original_image;
- MemoryCommit commit;
std::vector storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
- MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr scale_framebuffer;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 123d3b1c4..9936f5658 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -210,6 +210,11 @@ public:
return dld;
}
+ /// Returns the VMA allocator.
+ VmaAllocator GetAllocator() const {
+ return allocator;
+ }
+
/// Returns the logical device.
const vk::Device& GetLogical() const {
return logical;
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index f87c99603..7f860cccd 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -15,6 +15,10 @@
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#include
+
namespace Vulkan {
namespace {
struct Range {
@@ -180,6 +184,24 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
MemoryAllocator::~MemoryAllocator() = default;
+vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
+ const VmaAllocationCreateInfo alloc_info = {
+ .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+ .requiredFlags = 0,
+ .preferredFlags = 0,
+ .pool = VK_NULL_HANDLE,
+ .pUserData = nullptr,
+ };
+
+ VkImage handle{};
+ VmaAllocation allocation{};
+ vk::Check(
+ vmaCreateImage(device.GetAllocator(), &ci, &alloc_info, &handle, &allocation, nullptr));
+ return vk::Image(handle, *device.GetLogical(), device.GetAllocator(), allocation,
+ device.GetDispatchLoader());
+}
+
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
const u32 type_mask = requirements.memoryTypeBits;
@@ -205,14 +227,6 @@ MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage
return commit;
}
-MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
- VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image);
- requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity);
- auto commit = Commit(requirements, usage);
- image.BindMemory(commit.Memory(), commit.Offset());
- return commit;
-}
-
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const u32 type = FindType(flags, type_mask).value();
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 494f30f51..f9ee53cfb 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -80,6 +80,8 @@ public:
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
MemoryAllocator(const MemoryAllocator&) = delete;
+ vk::Image CreateImage(const VkImageCreateInfo& ci) const;
+
/**
* Commits a memory with the specified requirements.
*
@@ -93,9 +95,6 @@ public:
/// Commits memory required by the buffer and binds it.
MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
- /// Commits memory required by the image and binds it.
- MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
-
private:
/// Tries to allocate a chunk of memory.
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 336f53700..5d088dc58 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -12,6 +12,10 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
+#include
+
namespace Vulkan::vk {
namespace {
@@ -547,6 +551,16 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
return DebugUtilsMessenger(object, handle, *dld);
}
+void Image::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
+}
+
+void Image::Release() const noexcept {
+ if (handle) {
+ vmaDestroyImage(allocator, handle, allocation);
+ }
+}
+
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
}
@@ -559,14 +573,6 @@ void BufferView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
}
-void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
- Check(dld->vkBindImageMemory(owner, handle, memory, offset));
-}
-
-void Image::SetObjectNameEXT(const char* name) const {
- SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
-}
-
void ImageView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
}
@@ -713,12 +719,6 @@ BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
return BufferView(object, handle, *dld);
}
-Image Device::CreateImage(const VkImageCreateInfo& ci) const {
- VkImage object;
- Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
- return Image(object, handle, *dld);
-}
-
ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
VkImageView object;
Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 4ff328a21..8ec708774 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -32,6 +32,9 @@
#pragma warning(disable : 26812) // Disable prefer enum class over enum
#endif
+VK_DEFINE_HANDLE(VmaAllocator)
+VK_DEFINE_HANDLE(VmaAllocation)
+
namespace Vulkan::vk {
/**
@@ -616,6 +619,60 @@ public:
}
};
+class Image {
+public:
+ explicit Image(VkImage handle_, VkDevice owner_, VmaAllocator allocator_,
+ VmaAllocation allocation_, const DeviceDispatch& dld_) noexcept
+ : handle{handle_}, owner{owner_}, allocator{allocator_},
+ allocation{allocation_}, dld{&dld_} {}
+ Image() = default;
+
+ Image(const Image&) = delete;
+ Image& operator=(const Image&) = delete;
+
+ Image(Image&& rhs) noexcept
+ : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
+ allocation{rhs.allocation}, dld{rhs.dld} {}
+
+ Image& operator=(Image&& rhs) noexcept {
+ Release();
+ handle = std::exchange(rhs.handle, nullptr);
+ owner = rhs.owner;
+ allocator = rhs.allocator;
+ allocation = rhs.allocation;
+ dld = rhs.dld;
+ return *this;
+ }
+
+ ~Image() noexcept {
+ Release();
+ }
+
+ VkImage operator*() const noexcept {
+ return handle;
+ }
+
+ void reset() noexcept {
+ Release();
+ handle = nullptr;
+ }
+
+ explicit operator bool() const noexcept {
+ return handle != nullptr;
+ }
+
+ void SetObjectNameEXT(const char* name) const;
+
+private:
+ void Release() const noexcept;
+
+ VkImage handle = nullptr;
+ VkDevice owner = nullptr;
+ VmaAllocator allocator = nullptr;
+ VmaAllocation allocation = nullptr;
+ const DeviceDispatch* dld = nullptr;
+};
+
class Queue {
public:
/// Construct an empty queue handle.
@@ -658,17 +715,6 @@ public:
void SetObjectNameEXT(const char* name) const;
};
-class Image : public Handle {
- using Handle::Handle;
-
-public:
- /// Attaches a memory allocation.
- void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
-
- /// Set object name.
- void SetObjectNameEXT(const char* name) const;
-};
-
class ImageView : public Handle {
using Handle::Handle;
@@ -844,8 +890,6 @@ public:
BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
- Image CreateImage(const VkImageCreateInfo& ci) const;
-
ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
Semaphore CreateSemaphore() const;
--
cgit v1.2.3
From 7b2f680468bbac206f96b26a1300939be90f5f1b Mon Sep 17 00:00:00 2001
From: GPUCode
Date: Sat, 27 May 2023 17:09:17 +0300
Subject: renderer_vulkan: Use VMA for buffers
---
src/video_core/renderer_vulkan/renderer_vulkan.cpp | 9 +-
src/video_core/renderer_vulkan/vk_blit_screen.cpp | 6 +-
src/video_core/renderer_vulkan/vk_blit_screen.h | 1 -
src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 81 +++++++++-------
src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 -
src/video_core/renderer_vulkan/vk_smaa.cpp | 12 +--
.../renderer_vulkan/vk_staging_buffer_pool.cpp | 105 ++------------------
.../renderer_vulkan/vk_staging_buffer_pool.h | 4 +-
.../renderer_vulkan/vk_texture_cache.cpp | 9 +-
src/video_core/renderer_vulkan/vk_texture_cache.h | 1 -
src/video_core/renderer_vulkan/vk_turbo_mode.cpp | 8 +-
src/video_core/vulkan_common/vulkan_device.cpp | 1 +
.../vulkan_common/vulkan_memory_allocator.cpp | 107 ++++++++++++++++-----
.../vulkan_common/vulkan_memory_allocator.h | 12 ++-
src/video_core/vulkan_common/vulkan_wrapper.cpp | 24 +++--
src/video_core/vulkan_common/vulkan_wrapper.h | 91 +++++++++++++++---
16 files changed, 262 insertions(+), 211 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index e569523b6..ddf28ca28 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -233,8 +233,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
- const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
- MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
+ const vk::Buffer dst_buffer =
+ memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@@ -308,8 +308,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
- const auto dst_memory_map = dst_buffer_memory.Map();
- std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
+ dst_buffer.Invalidate();
+ std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
+ dst_buffer.Mapped().size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 82ca81c7e..ad3b29f0e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
SetUniformData(data, layout);
SetVertexData(data, framebuffer, layout);
- const std::span mapped_span = buffer_commit.Map();
+ const std::span mapped_span = buffer.Mapped();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
@@ -1074,7 +1074,6 @@ void BlitScreen::ReleaseRawImages() {
aa_image_view.reset();
aa_image.reset();
buffer.reset();
- buffer_commit = MemoryCommit{};
}
void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -1090,8 +1089,7 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
.pQueueFamilyIndices = nullptr,
};
- buffer = device.GetLogical().CreateBuffer(ci);
- buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
+ buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
}
void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 7fcfa9976..8365b5668 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -142,7 +142,6 @@ private:
vk::Sampler sampler;
vk::Buffer buffer;
- MemoryCommit buffer_commit;
std::vector resource_ticks;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8c33722d3..67356c679 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
-vk::Buffer CreateBuffer(const Device& device, u64 size) {
+vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
- return device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
} // Anonymous namespace
@@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_),
- device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
- commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
+ device{&runtime.device}, buffer{
+ CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@@ -138,7 +139,7 @@ public:
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -147,14 +148,21 @@ public:
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Quad LUT");
}
- memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
- u8* staging_data = staging.mapped_span.data();
+ const bool host_visible = buffer.IsHostVisible();
+ const StagingBufferRef staging = [&] {
+ if (host_visible) {
+ return StagingBufferRef{};
+ }
+ return staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ }();
+
+ u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -164,29 +172,33 @@ public:
}
}
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
- dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
- const VkBufferCopy copy{
- .srcOffset = src_offset,
- .dstOffset = 0,
- .size = size_bytes,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = dst_buffer,
- .offset = 0,
- .size = size_bytes,
- };
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
- });
+ if (!host_visible) {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
+ });
+ } else {
+ buffer.Flush();
+ }
}
void BindBuffer(u32 first) {
@@ -587,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
- null_buffer = device.GetLogical().CreateBuffer(create_info);
+ null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
- null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index cdeef8846..95446c732 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -48,7 +48,6 @@ private:
const Device* device{};
vk::Buffer buffer;
- MemoryCommit commit;
std::vector views;
};
@@ -142,7 +141,6 @@ private:
std::shared_ptr quad_strip_index_buffer;
vk::Buffer null_buffer;
- MemoryCommit null_buffer_commit;
std::unique_ptr uint8_pass;
QuadIndexedPass quad_index_pass;
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index ff7c3a419..5efd7d66e 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -76,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span initial_contents = {}) {
- auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo upload_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -85,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload);
- std::ranges::copy(initial_contents, upload_commit.Map().begin());
+ };
+ auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload);
+ std::ranges::copy(initial_contents, upload_buffer.Mapped().begin());
+ upload_buffer.Flush();
const std::array regions{{{
.bufferOffset = 0,
@@ -111,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
});
scheduler.Finish();
-
- // This should go out of scope before the commit
- auto upload_buffer2 = std::move(upload_buffer);
}
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 74ca77216..62b251a9b 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
-constexpr VkMemoryPropertyFlags HOST_FLAGS =
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-
-bool IsStreamHeap(VkMemoryHeap heap) noexcept {
- return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
-}
-
-std::optional FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- VkMemoryPropertyFlags flags) noexcept {
- for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
- if (((type_mask >> type_index) & 1) == 0) {
- // Memory type is incompatible
- continue;
- }
- const VkMemoryType& memory_type = props.memoryTypes[type_index];
- if ((memory_type.propertyFlags & flags) != flags) {
- // Memory type doesn't have the flags we want
- continue;
- }
- if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
- // Memory heap is not suitable for streaming
- continue;
- }
- // Success!
- return type_index;
- }
- return std::nullopt;
-}
-
-u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- bool try_device_local) {
- std::optional type;
- if (try_device_local) {
- // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
- if (type) {
- return *type;
- }
- }
- // Otherwise try without the DEVICE_LOCAL_BIT
- type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
- if (type) {
- return *type;
- }
- // This should never happen, and in case it does, signal it as an out of memory situation
- throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
-}
-
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
@@ -87,8 +38,7 @@ size_t Region(size_t iterator) noexcept {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
- const vk::Device& dev = device.GetLogical();
- stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -99,46 +49,13 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- if (device.HasDebuggingToolAttached()) {
- stream_buffer.SetObjectNameEXT("Stream Buffer");
- }
- VkMemoryDedicatedRequirements dedicated_reqs{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- .pNext = nullptr,
- .prefersDedicatedAllocation = VK_FALSE,
- .requiresDedicatedAllocation = VK_FALSE,
- };
- const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
- const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
- dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
- const VkMemoryDedicatedAllocateInfo dedicated_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = nullptr,
- .image = nullptr,
- .buffer = *stream_buffer,
};
- const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
- VkMemoryAllocateInfo stream_memory_info{
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = make_dedicated ? &dedicated_info : nullptr,
- .allocationSize = requirements.size,
- .memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
- };
- stream_memory = dev.TryAllocateMemory(stream_memory_info);
- if (!stream_memory) {
- LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
- stream_memory_info.memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
- stream_memory = dev.AllocateMemory(stream_memory_info);
- }
-
+ stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream);
if (device.HasDebuggingToolAttached()) {
- stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
}
- stream_buffer.BindMemory(*stream_memory, 0);
- stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+ stream_pointer = stream_buffer.Mapped();
+ ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
@@ -199,7 +116,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast(offset),
- .mapped_span = std::span(stream_pointer + offset, size),
+ .mapped_span = stream_pointer.subspan(offset, size),
.usage{},
.log2_level{},
.index{},
@@ -247,7 +164,7 @@ std::optional StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
- vk::Buffer buffer = device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -259,17 +176,15 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage);
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
- MemoryCommit commit = memory_allocator.Commit(buffer, usage);
- const std::span mapped_span = IsHostVisible(usage) ? commit.Map() : std::span{};
-
+ const std::span mapped_span = buffer.Mapped();
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
- .commit = std::move(commit),
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 4fd15f11a..5f69f08b1 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -46,7 +46,6 @@ private:
struct StagingBuffer {
vk::Buffer buffer;
- MemoryCommit commit;
std::span mapped_span;
MemoryUsage usage;
u32 log2_level;
@@ -97,8 +96,7 @@ private:
Scheduler& scheduler;
vk::Buffer stream_buffer;
- vk::DeviceMemory stream_memory;
- u8* stream_pointer = nullptr;
+ std::span stream_pointer;
size_t iterator = 0;
size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 10defe6cb..28985b6fe 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
- if (buffer_commits[level]) {
+ if (buffers[level]) {
return *buffers[level];
}
const auto new_size = Common::NextPow2(needed_size);
static constexpr VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
- buffers[level] = device.GetLogical().CreateBuffer({
+ const VkBufferCreateInfo temp_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
- buffer_commits[level] = std::make_unique(
- memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
+ };
+ buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal);
return *buffers[level];
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 9481f2531..220943116 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -116,7 +116,6 @@ public:
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array buffers{};
- std::array, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index 6417d7e31..460d8d59d 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) {
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
- auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
+ const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
-
- // Commit some device local memory for the buffer.
- auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ };
+ vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
static constexpr VkDescriptorPoolSize pool_size{
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 631d5e378..541f0c1da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -603,6 +603,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
const VmaAllocatorCreateInfo allocator_info = {
+ .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
.physicalDevice = physical,
.device = *logical,
.pVulkanFunctions = &functions,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 7f860cccd..d2e1ef58e 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -51,11 +51,59 @@ struct Range {
case MemoryUsage::Download:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ case MemoryUsage::Stream:
+ return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
+[[nodiscard]] VkMemoryPropertyFlags MemoryUsageRequiredVmaFlags(MemoryUsage usage) {
+ switch (usage) {
+ case MemoryUsage::DeviceLocal:
+ return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ case MemoryUsage::Upload:
+ case MemoryUsage::Stream:
+ return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ case MemoryUsage::Download:
+ return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+ ASSERT_MSG(false, "Invalid memory usage={}", usage);
+ return {};
+}
+
+[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) {
+ return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+ : VkMemoryPropertyFlags{};
+}
+
+[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
+ switch (usage) {
+ case MemoryUsage::Upload:
+ case MemoryUsage::Stream:
+ return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+ case MemoryUsage::Download:
+ return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+ case MemoryUsage::DeviceLocal:
+ return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
+ }
+ return {};
+}
+
+[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
+ switch (usage) {
+ case MemoryUsage::DeviceLocal:
+ case MemoryUsage::Stream:
+ return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+ case MemoryUsage::Upload:
+ case MemoryUsage::Download:
+ return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
+ }
+ return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+}
+
} // Anonymous namespace
class MemoryAllocation {
@@ -178,17 +226,18 @@ void MemoryCommit::Release() {
}
MemoryAllocator::MemoryAllocator(const Device& device_)
- : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
+ : device{device_}, allocator{device.GetAllocator()},
+ properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
- const VmaAllocationCreateInfo alloc_info = {
+ const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
- .requiredFlags = 0,
+ .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
@@ -196,12 +245,40 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
VkImage handle{};
VmaAllocation allocation{};
- vk::Check(
- vmaCreateImage(device.GetAllocator(), &ci, &alloc_info, &handle, &allocation, nullptr));
- return vk::Image(handle, *device.GetLogical(), device.GetAllocator(), allocation,
+
+ vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
+
+ return vk::Image(handle, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
}
+vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
+ const VmaAllocationCreateInfo alloc_ci = {
+ .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT |
+ MemoryUsageVmaFlags(usage),
+ .usage = MemoryUsageVma(usage),
+ .requiredFlags = MemoryUsageRequiredVmaFlags(usage),
+ .preferredFlags = MemoryUsagePreferedVmaFlags(usage),
+ .pool = VK_NULL_HANDLE,
+ .pUserData = nullptr,
+ };
+
+ VkBuffer handle{};
+ VmaAllocationInfo alloc_info{};
+ VmaAllocation allocation{};
+ VkMemoryPropertyFlags property_flags{};
+
+ vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
+ vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
+
+ u8* data = reinterpret_cast(alloc_info.pMappedData);
+ const std::span mapped_data = data ? std::span{data, ci.size} : std::span{};
+ const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+ return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
+ device.GetDispatchLoader());
+}
+
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
const u32 type_mask = requirements.memoryTypeBits;
@@ -221,12 +298,6 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M
return TryCommit(requirements, flags).value();
}
-MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
- auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
- buffer.BindMemory(commit.Memory(), commit.Offset());
- return commit;
-}
-
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const u32 type = FindType(flags, type_mask).value();
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
@@ -302,16 +373,4 @@ std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty
return std::nullopt;
}
-bool IsHostVisible(MemoryUsage usage) noexcept {
- switch (usage) {
- case MemoryUsage::DeviceLocal:
- return false;
- case MemoryUsage::Upload:
- case MemoryUsage::Download:
- return true;
- }
- ASSERT_MSG(false, "Invalid memory usage={}", usage);
- return false;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index f9ee53cfb..f449bc8d0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -9,6 +9,8 @@
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+VK_DEFINE_HANDLE(VmaAllocator)
+
namespace Vulkan {
class Device;
@@ -17,9 +19,11 @@ class MemoryAllocation;
/// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
- DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
+ DeviceLocal, ///< Requests device local host visible buffer, falling back to device local
+ ///< memory.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
+ Stream, ///< Requests device local host visible buffer, falling back host memory.
};
/// Ownership handle of a memory commitment.
@@ -82,6 +86,8 @@ public:
vk::Image CreateImage(const VkImageCreateInfo& ci) const;
+ vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const;
+
/**
* Commits a memory with the specified requirements.
*
@@ -113,13 +119,11 @@ private:
std::optional FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
const Device& device; ///< Device handle.
+ VmaAllocator allocator; ///< Vma allocator.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
std::vector> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
};
-/// Returns true when a memory usage is guaranteed to be host visible.
-bool IsHostVisible(MemoryUsage usage) noexcept;
-
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 5d088dc58..c01a9478e 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -561,14 +561,28 @@ void Image::Release() const noexcept {
}
}
-void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
- Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
+void Buffer::Flush() const {
+ if (!is_coherent) {
+ vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
+ }
+}
+
+void Buffer::Invalidate() const {
+ if (!is_coherent) {
+ vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
+ }
}
void Buffer::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
}
+void Buffer::Release() const noexcept {
+ if (handle) {
+ vmaDestroyBuffer(allocator, handle, allocation);
+ }
+}
+
void BufferView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
}
@@ -707,12 +721,6 @@ Queue Device::GetQueue(u32 family_index) const noexcept {
return Queue(queue, *dld);
}
-Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
- VkBuffer object;
- Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
- return Buffer(object, handle, *dld);
-}
-
BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
VkBufferView object;
Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 8ec708774..44fce47a5 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -673,6 +673,84 @@ private:
const DeviceDispatch* dld = nullptr;
};
+class Buffer {
+public:
+ explicit Buffer(VkBuffer handle_, VkDevice owner_, VmaAllocator allocator_,
+ VmaAllocation allocation_, std::span mapped_, bool is_coherent_,
+ const DeviceDispatch& dld_) noexcept
+ : handle{handle_}, owner{owner_}, allocator{allocator_},
+ allocation{allocation_}, mapped{mapped_}, is_coherent{is_coherent_}, dld{&dld_} {}
+ Buffer() = default;
+
+ Buffer(const Buffer&) = delete;
+ Buffer& operator=(const Buffer&) = delete;
+
+ Buffer(Buffer&& rhs) noexcept
+ : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
+ allocation{rhs.allocation}, mapped{rhs.mapped},
+ is_coherent{rhs.is_coherent}, dld{rhs.dld} {}
+
+ Buffer& operator=(Buffer&& rhs) noexcept {
+ Release();
+ handle = std::exchange(rhs.handle, nullptr);
+ owner = rhs.owner;
+ allocator = rhs.allocator;
+ allocation = rhs.allocation;
+ mapped = rhs.mapped;
+ is_coherent = rhs.is_coherent;
+ dld = rhs.dld;
+ return *this;
+ }
+
+ ~Buffer() noexcept {
+ Release();
+ }
+
+ VkBuffer operator*() const noexcept {
+ return handle;
+ }
+
+ void reset() noexcept {
+ Release();
+ handle = nullptr;
+ }
+
+ explicit operator bool() const noexcept {
+ return handle != nullptr;
+ }
+
+ /// Returns the host mapped memory, an empty span otherwise.
+ std::span Mapped() noexcept {
+ return mapped;
+ }
+
+ std::span Mapped() const noexcept {
+ return mapped;
+ }
+
+ /// Returns true if the buffer is mapped to the host.
+ bool IsHostVisible() const noexcept {
+ return !mapped.empty();
+ }
+
+ void Flush() const;
+
+ void Invalidate() const;
+
+ void SetObjectNameEXT(const char* name) const;
+
+private:
+ void Release() const noexcept;
+
+ VkBuffer handle = nullptr;
+ VkDevice owner = nullptr;
+ VmaAllocator allocator = nullptr;
+ VmaAllocation allocation = nullptr;
+ std::span mapped = {};
+ bool is_coherent = false;
+ const DeviceDispatch* dld = nullptr;
+};
+
class Queue {
public:
/// Construct an empty queue handle.
@@ -696,17 +774,6 @@ private:
const DeviceDispatch* dld = nullptr;
};
-class Buffer : public Handle {
- using Handle::Handle;
-
-public:
- /// Attaches a memory allocation.
- void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
-
- /// Set object name.
- void SetObjectNameEXT(const char* name) const;
-};
-
class BufferView : public Handle {
using Handle::Handle;
@@ -886,8 +953,6 @@ public:
Queue GetQueue(u32 family_index) const noexcept;
- Buffer CreateBuffer(const VkBufferCreateInfo& ci) const;
-
BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
--
cgit v1.2.3
From ee0d68300e68a221d9930935f26d0c96be79357b Mon Sep 17 00:00:00 2001
From: GPUCode
Date: Sun, 18 Jun 2023 12:27:31 +0300
Subject: renderer_vulkan: Add missing initializers
---
src/video_core/vulkan_common/vulkan_device.cpp | 12 ++++++++----
src/video_core/vulkan_common/vulkan_memory_allocator.cpp | 6 +++++-
2 files changed, 13 insertions(+), 5 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 541f0c1da..94dd1aa14 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -597,18 +597,22 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
- const VmaVulkanFunctions functions = {
- .vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr,
- .vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr,
- };
+ VmaVulkanFunctions functions{};
+ functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr;
+ functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr;
const VmaAllocatorCreateInfo allocator_info = {
.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
.physicalDevice = physical,
.device = *logical,
+ .preferredLargeHeapBlockSize = 0,
+ .pAllocationCallbacks = nullptr,
+ .pDeviceMemoryCallbacks = nullptr,
+ .pHeapSizeLimit = nullptr,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1,
+ .pTypeExternalMemoryHandleTypes = nullptr,
};
vk::Check(vmaCreateAllocator(&allocator_info, &allocator));
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index d2e1ef58e..20d36680c 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -75,7 +75,7 @@ struct Range {
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) {
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
- : VkMemoryPropertyFlags{};
+ : VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
@@ -239,8 +239,10 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.preferredFlags = 0,
+ .memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
+ .priority = 0.f,
};
VkImage handle{};
@@ -259,8 +261,10 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa
.usage = MemoryUsageVma(usage),
.requiredFlags = MemoryUsageRequiredVmaFlags(usage),
.preferredFlags = MemoryUsagePreferedVmaFlags(usage),
+ .memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
+ .priority = 0.f,
};
VkBuffer handle{};
--
cgit v1.2.3
From 346c253cd2397ac152fd10f6b99d6af79349a77f Mon Sep 17 00:00:00 2001
From: lat9nq
Date: Thu, 15 Jun 2023 16:16:36 -0400
Subject: video_core: Formalize HasBrokenCompute
Also limits it to only affected Intel proprietrary driver versions.
vulkan_device: Move broken compute determination
vk_device: Remove errant back quote
---
.../renderer_vulkan/vk_pipeline_cache.cpp | 5 +----
src/video_core/vulkan_common/vulkan_device.cpp | 2 ++
src/video_core/vulkan_common/vulkan_device.h | 23 ++++++++++++++++++++++
3 files changed, 26 insertions(+), 4 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..ee2c33131 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -705,10 +705,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline(
std::unique_ptr PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
- // TODO: Remove this when Intel fixes their shader compiler.
- // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
- if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
- !Settings::values.enable_compute_pipelines.GetValue()) {
+ if (device.HasBrokenCompute() && !Settings::values.enable_compute_pipelines.GetValue()) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index dcedf4425..e38e34bc8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -562,6 +562,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
+ has_broken_compute =
+ CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion);
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = true;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 8c7e44fcb..e54828088 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,6 +10,7 @@
#include
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -518,6 +519,11 @@ public:
return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
}
+ /// @returns True if compute pipelines can cause crashing.
+ bool HasBrokenCompute() const {
+ return has_broken_compute;
+ }
+
/// Returns true when the device does not properly support cube compatibility.
bool HasBrokenCubeImageCompability() const {
return has_broken_cube_compatibility;
@@ -579,6 +585,22 @@ public:
return supports_conditional_barriers;
}
+ [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
+ u32 driver_version) {
+ if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ const u32 major = VK_API_VERSION_MAJOR(driver_version);
+ const u32 minor = VK_API_VERSION_MINOR(driver_version);
+ const u32 patch = VK_API_VERSION_PATCH(driver_version);
+ if (major == 0 && minor == 405 && patch < 286) {
+ LOG_WARNING(
+ Render_Vulkan,
+ "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute");
+ return true;
+ }
+ }
+ return {};
+ }
+
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@@ -672,6 +694,7 @@ private:
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
+ bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
--
cgit v1.2.3
From 711190bb6708f822a3bdb7afd30168d2cc3ed0e4 Mon Sep 17 00:00:00 2001
From: Kelebek1
Date: Mon, 19 Jun 2023 00:19:28 +0100
Subject: Use current GPU address when unmapping GPU pages, not the base
---
src/video_core/memory_manager.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7b2cde7a7..b2f7e160a 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
[[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr);
SetEntry(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
@@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
[[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr);
SetEntry(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
--
cgit v1.2.3
From 197e13d93d6740cda589d88804262d6bdd176a74 Mon Sep 17 00:00:00 2001
From: lat9nq
Date: Mon, 19 Jun 2023 17:33:30 -0400
Subject: video_core: Check broken compute earlier
Checks it as the system is determining what settings to enable. Reduces
the need to check settings while the system is running.
---
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +-
src/video_core/vulkan_common/vulkan_device.cpp | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ee2c33131..a2cfb2105 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -705,7 +705,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline(
std::unique_ptr PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
- if (device.HasBrokenCompute() && !Settings::values.enable_compute_pipelines.GetValue()) {
+ if (device.HasBrokenCompute()) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index e38e34bc8..fa9cde75b 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -563,7 +563,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
cant_blit_msaa = true;
}
has_broken_compute =
- CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion);
+ CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
+ !Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = true;
--
cgit v1.2.3
From 78ff2862f6a0785247d3aa64bdc210b545e4d82d Mon Sep 17 00:00:00 2001
From: toast2903
Date: Sun, 18 Jun 2023 23:29:27 -0400
Subject: vulkan_device: Remove brace initializer
Co-authored-by: Tobias ---
src/video_core/vulkan_common/vulkan_device.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'src/video_core')
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index e54828088..0b634a876 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -598,7 +598,7 @@ public:
return true;
}
}
- return {};
+ return false;
}
private:
--
cgit v1.2.3
From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001
From: Kelebek1
Date: Tue, 23 May 2023 14:45:54 +0100
Subject: Remove memory allocations in some hot paths
---
src/video_core/buffer_cache/buffer_cache.h | 4 +-
src/video_core/buffer_cache/buffer_cache_base.h | 4 +-
src/video_core/cdma_pusher.h | 1 -
src/video_core/dma_pusher.h | 8 ++--
src/video_core/engines/maxwell_dma.cpp | 35 ++++++++--------
src/video_core/host1x/codecs/h264.cpp | 4 +-
src/video_core/memory_manager.cpp | 13 +++---
src/video_core/memory_manager.h | 15 ++++---
src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 +-
src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +-
.../renderer_vulkan/vk_pipeline_cache.cpp | 10 ++++-
.../renderer_vulkan/vk_texture_cache.cpp | 27 ++++++------
src/video_core/shader_cache.cpp | 4 +-
src/video_core/texture_cache/image_base.h | 5 ++-
src/video_core/texture_cache/texture_cache.h | 14 +++----
src/video_core/texture_cache/texture_cache_base.h | 4 +-
src/video_core/texture_cache/util.cpp | 48 ++++++++++++----------
src/video_core/texture_cache/util.h | 31 +++++++-------
src/video_core/transform_feedback.cpp | 8 ++--
src/video_core/transform_feedback.h | 2 +-
src/video_core/vulkan_common/vulkan_device.cpp | 1 +
21 files changed, 137 insertions(+), 107 deletions(-)
(limited to 'src/video_core')
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 45977d578..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
- tmp_buffer.resize(amount);
+ tmp_buffer.resize_destructive(amount);
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
return true;
@@ -1279,7 +1279,7 @@ template
typename BufferCache::OverlapResult BufferCache
::ResolveOverlaps(VAddr cpu_addr,
u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16;
- std::vector overlap_ids;
+ boost::container::small_vector overlap_ids;
VAddr begin = cpu_addr;
VAddr end = cpu_addr + wanted_size;
int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches;
struct OverlapResult {
- std::vector ids;
+ boost::container::small_vector ids;
VAddr begin;
VAddr end;
bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
BufferId inline_buffer_id;
std::array> CACHING_PAGEBITS)> page_table;
- std::vector tmp_buffer;
+ Common::ScratchBuffer tmp_buffer;
};
} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
};
using ChCommandHeaderList = std::vector;
-using ChCommandList = std::vector;
struct ThiRegisters {
u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include
#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
struct CommandList final {
CommandList() = default;
explicit CommandList(std::size_t size) : command_lists(size) {}
- explicit CommandList(std::vector&& prefetch_command_list_)
+ explicit CommandList(
+ boost::container::small_vector&& prefetch_command_list_)
: prefetch_command_list{std::move(prefetch_command_list_)} {}
- std::vector command_lists;
- std::vector prefetch_command_list;
+ boost::container::small_vector command_lists;
+ boost::container::small_vector prefetch_command_list;
};
/**
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
ASSERT(regs.remap_const.component_size_minus_one == 3);
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
- std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
+ std::span span(reinterpret_cast(read_buffer.data()), regs.line_length_in);
+ std::ranges::fill(span, regs.remap_consta_value);
memory_manager.WriteBlockUnsafe(regs.offset_out,
- reinterpret_cast(tmp_buffer.data()),
+ reinterpret_cast(read_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
- tmp_buffer.data(), tmp_buffer.size());
- memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
+ memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
+ read_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
- memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
+ read_buffer.size());
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
- tmp_buffer.data(), tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
}
} else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
- std::vector tmp_buffer(regs.line_length_in);
- memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ read_buffer.resize_destructive(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
regs.line_length_in);
- memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
+ memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in);
}
}
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_operand.address = regs.offset_in;
DMA::BufferOperand dst_operand;
- dst_operand.pitch = regs.pitch_out;
+ u32 abs_pitch_out = std::abs(static_cast(regs.pitch_out));
+ dst_operand.pitch = abs_pitch_out;
dst_operand.width = regs.line_length_in;
dst_operand.height = regs.line_count;
dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count;
+ const size_t dst_size = static_cast(abs_pitch_out) * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
- regs.pitch_out);
+ abs_pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
#include
#include
+#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
}
void H264BitWriter::WriteScalingList(std::span list, s32 start, s32 count) {
- std::vector scan(count);
+ static Common::ScratchBuffer scan{};
+ scan.resize_destructive(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
} else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index b2f7e160a..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) {
- std::vector tmp_buffer(size);
+ tmp_buffer.resize_destructive(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
return result;
}
-std::vector> MemoryManager::GetSubmappedRange(
- GPUVAddr gpu_addr, std::size_t size) const {
- std::vector> result{};
+boost::container::small_vector, 32>
+MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ boost::container::small_vector, 32> result{};
GetSubmappedRangeImpl(gpu_addr, size, result);
return result;
}
@@ -680,8 +680,9 @@ std::vector> MemoryManager::GetSubmappedRange(
template
void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector, std::size_t>>&
- result) const {
+ boost::container::small_vector<
+ std::pair, std::size_t>, 32>& result)
+ const {
std::optional, std::size_t>>
last_segment{};
std::optional old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
#include
#include
#include
+#include
#include "common/common_types.h"
#include "common/multi_level_page_table.h"
#include "common/range_map.h"
+#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
#include "video_core/cache_types.h"
#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
* if the region is continuous, a single pair will be returned. If it's unmapped, an empty
* vector will be returned;
*/
- std::vector> GetSubmappedRange(GPUVAddr gpu_addr,
- std::size_t size) const;
+ boost::container::small_vector, 32> GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const;
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
template
void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector, std::size_t>>&
+ boost::container::small_vector<
+ std::pair, std::size_t>, 32>&
result) const;
Core::System& system;
@@ -215,8 +218,8 @@ private:
Common::VirtualBuffer big_page_table_cpu;
std::vector big_page_continuous;
- std::vector> page_stash{};
- std::vector> page_stash2{};
+ boost::container::small_vector, 32> page_stash{};
+ boost::container::small_vector, 32> page_stash2{};
mutable std::mutex guard;
@@ -226,6 +229,8 @@ private:
std::unique_ptr accumulator;
static std::atomic unique_identifier_generator;
+
+ Common::ScratchBuffer tmp_buffer;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB:
case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
break;
case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index e30fcb1ed..f47301ad5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector vk_copies(copies.size());
+ boost::container::small_vector vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a2cfb2105..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
}
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
break;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f025f618b..f3cef09dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) {
};
}
-[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies(
- std::span copies, size_t buffer_offset) {
- std::vector result(copies.size());
+[[maybe_unused]] [[nodiscard]] boost::container::small_vector
+TransformBufferCopies(std::span copies, size_t buffer_offset) {
+ boost::container::small_vector result(copies.size());
std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) {
return result;
}
-[[nodiscard]] std::vector TransformBufferImageCopies(
+[[nodiscard]] boost::container::small_vector TransformBufferImageCopies(
std::span copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) {
VkImageAspectFlags aspect_mask;
};
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- std::vector result(copies.size() * 2);
+ boost::container::small_vector result(copies.size() * 2);
std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result;
} else {
- std::vector result(copies.size());
+ boost::container::small_vector result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result;
}
@@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span copies) {
- std::vector vk_in_copies(copies.size());
- std::vector vk_out_copies(copies.size());
+ boost::container::small_vector vk_in_copies(copies.size());
+ boost::container::small_vector vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
@@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span copies) {
- std::vector vk_copies(copies.size());
+ boost::container::small_vector vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
@@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+ auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span buffers_span, std::span buffers_vector{};
- boost::container::small_vector, 1> vk_copies;
+ boost::container::small_vector buffers_vector{};
+ boost::container::small_vector, 8>
+ vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
@@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span color_buffers,
ImageView* depth_buffer, bool is_rescaled) {
- std::vector attachments;
+ boost::container::small_vector attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end());
- std::vector removed_shaders;
- removed_shaders.reserve(marked_for_removal.size());
+ boost::container::small_vector removed_shaders;
std::scoped_lock lock{lookup_mutex};
-
for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
std::vector image_view_infos;
std::vector image_view_ids;
- std::vector slice_offsets;
- std::vector slice_subresources;
+ boost::container::small_vector slice_offsets;
+ boost::container::small_vector slice_subresources;
std::vector aliased_images;
std::vector overlapping_images;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d58bb69ff..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -526,7 +526,7 @@ void TextureCache::WriteMemory(VAddr cpu_addr, size_t size) {
template
void TextureCache::DownloadMemory(VAddr cpu_addr, size_t size) {
- std::vector images;
+ boost::container::small_vector images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
@@ -579,7 +579,7 @@ std::optional TextureCache::GetFlushArea(V
template
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
- std::vector deleted_images;
+ boost::container::small_vector deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
@@ -593,7 +593,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
template
void TextureCache::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
- std::vector deleted_images;
+ boost::container::small_vector deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
@@ -1101,7 +1101,7 @@ ImageId TextureCache::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool native_bgr = runtime.HasNativeBgr();
const bool flexible_formats = True(options & RelaxedOptions::Format);
ImageId image_id{};
- boost::container::small_vector image_ids;
+ boost::container::small_vector image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1622,7 +1622,7 @@ ImageId TextureCache::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
}
}
ImageId image_id{};
- boost::container::small_vector image_ids;
+ boost::container::small_vector image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1942,7 +1942,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
image.map_view_id = map_id;
return;
}
- std::vector sparse_maps{};
+ boost::container::small_vector sparse_maps;
ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2217,7 +2217,7 @@ void TextureCache::MarkModification(ImageBase& image) noexcept {
template
void TextureCache::SynchronizeAliases(ImageId image_id) {
- boost::container::small_vector aliased_images;
+ boost::container::small_vector aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 44232b961..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
struct AsyncDecodeContext {
ImageId image_id;
Common::ScratchBuffer decoded_data;
- std::vector copies;
+ boost::container::small_vector copies;
std::mutex mutex;
std::atomic_bool complete;
};
@@ -429,7 +429,7 @@ private:
std::unordered_map, Common::IdentityHash