From b178c9a3492ea6c0db63f708beecd3dfb3d921fe Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 15 Apr 2020 22:10:40 -0400 Subject: decoder/image: Fix incorrect G24R8 component sizes in GetComponentSize() The components' sizes were mismatched. This corrects that. --- src/video_core/shader/decode/image.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..7f94dacc8 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -201,10 +201,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 0; case TextureFormat::G24R8: if (component == 0) { - return 8; + return 24; } if (component == 1) { - return 24; + return 8; } return 0; case TextureFormat::G8R8: -- cgit v1.2.3 From 24620bc4ea9ca59a757b7f07ca912f6645c5b8ef Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 15 Apr 2020 22:26:47 -0400 Subject: decode/image: Fix typo in assert in GetComponentSize() --- src/video_core/shader/decode/image.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 7f94dacc8..08ebca38b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -119,7 +119,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, } break; } - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; } @@ -212,7 +212,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return 0; } } @@ -249,7 +249,7 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R1: return std::size_t{R}; default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return std::size_t{R | G | B | A}; } } -- cgit v1.2.3 From 29a0ca23918092d252f440b2f55f68bb3c991366 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Jun 2020 02:34:17 -0300 Subject: renderer_vulkan: Create a Vulkan 1.0 instance when 1.1 is not available This commit doesn't make yuzu compatible with Vulkan 1.0 yet, it only creates an 1.0 instance. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 5 ++++- src/video_core/renderer_vulkan/wrapper.cpp | 23 ++++++++++++++++++---- src/video_core/renderer_vulkan/wrapper.h | 4 +++- 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6e49699d0..6f9eadbeb 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -180,7 +180,10 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc } } - vk::Instance instance = vk::Instance::Create(layers, extensions, dld); + // Limit the maximum version of Vulkan to avoid using untested version. + const u32 version = std::min(vk::AvailableVersion(dld), static_cast(VK_API_VERSION_1_1)); + + vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); return {}; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 013865aa4..56055af1b 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -375,18 +376,17 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span buffe return VK_SUCCESS; } -Instance Instance::Create(Span layers, Span extensions, +Instance Instance::Create(u32 version, Span layers, Span extensions, InstanceDispatch& dld) noexcept { - static constexpr VkApplicationInfo application_info{ + const VkApplicationInfo application_info{ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = nullptr, .pApplicationName = "yuzu Emulator", .applicationVersion = VK_MAKE_VERSION(0, 1, 0), .pEngineName = "yuzu Emulator", .engineVersion = VK_MAKE_VERSION(0, 1, 0), - .apiVersion = VK_API_VERSION_1_1, + .apiVersion = version, }; - const VkInstanceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pNext = nullptr, @@ -775,6 +775,21 @@ VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noe return properties; } +u32 AvailableVersion(const InstanceDispatch& dld) noexcept { + PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; + if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) { + // If the procedure is not found, Vulkan 1.0 is assumed + return VK_API_VERSION_1_0; + } + u32 version; + if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1", + ToString(result)); + return VK_API_VERSION_1_1; + } + return version; +} + std::optional> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld) { u32 num; diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index b9d3fedc1..748a94d2f 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -563,7 +563,7 @@ class Instance : public Handle { public: /// Creates a Vulkan instance. Use "operator bool" for error handling. - static Instance Create(Span layers, Span extensions, + static Instance Create(u32 version, Span layers, Span extensions, InstanceDispatch& dld) noexcept; /// Enumerates physical devices. @@ -1048,6 +1048,8 @@ private: const DeviceDispatch* dld; }; +u32 AvailableVersion(const InstanceDispatch& dld) noexcept; + std::optional> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld); -- cgit v1.2.3 From c5a78f4480369ad6325c51549509361c10d2cea5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Jun 2020 02:48:29 -0300 Subject: vk_device: Use Vulkan 1.0 properly Enable the required capabilities to use Vulkan 1.0 without validation errors and disable those that are not compatible with it. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 19 +++--- src/video_core/renderer_vulkan/renderer_vulkan.h | 2 + src/video_core/renderer_vulkan/vk_device.cpp | 73 +++++++++++----------- src/video_core/renderer_vulkan/vk_device.h | 12 +++- .../renderer_vulkan/vk_shader_decompiler.cpp | 12 +++- 5 files changed, 66 insertions(+), 52 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6f9eadbeb..7ffc90cd0 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() { return library; } -vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld, - WindowSystemType window_type = WindowSystemType::Headless, - bool enable_layers = false) { +std::pair CreateInstance( + Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); return {}; @@ -191,7 +191,7 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc if (!vk::Load(*instance, dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); } - return instance; + return std::make_pair(std::move(instance), version); } std::string GetReadableVersion(u32 version) { @@ -289,8 +289,8 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) { bool RendererVulkan::Init() { library = OpenVulkanLibrary(); - instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, - Settings::values.renderer_debug); + std::tie(instance, instance_version) = CreateInstance( + library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } @@ -423,7 +423,8 @@ bool RendererVulkan::PickDevices() { return false; } - device = std::make_unique(*instance, physical_device, *surface, dld); + device = + std::make_unique(*instance, instance_version, physical_device, *surface, dld); return device->Create(); } @@ -433,7 +434,7 @@ void RendererVulkan::Report() const { const std::string driver_version = GetDriverVersion(*device); const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); - const std::string api_version = GetReadableVersion(device->GetApiVersion()); + const std::string api_version = GetReadableVersion(device->ApiVersion()); const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); @@ -453,7 +454,7 @@ void RendererVulkan::Report() const { std::vector RendererVulkan::EnumerateDevices() { vk::InstanceDispatch dld; Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = CreateInstance(library, dld); + vk::Instance instance = CreateInstance(library, dld).first; if (!instance) { return {}; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 522b5bff8..9617a93e9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -63,6 +63,8 @@ private: vk::InstanceDispatch dld; vk::Instance instance; + u32 instance_version{}; + vk::SurfaceKHR surface; VKScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ebcfaa0e3..90916ee0e 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -38,6 +38,9 @@ constexpr std::array Depth16UnormS8_UINT{ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_KHR_MAINTENANCE1_EXTENSION_NAME, + VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, + VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_8BIT_STORAGE_EXTENSION_NAME, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, @@ -171,10 +174,10 @@ std::unordered_map GetFormatProperties( } // Anonymous namespace -VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld) - : dld{dld}, physical{physical}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical, dld)} { +VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, + VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) + : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, + instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); } @@ -565,20 +568,6 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::vector VKDevice::LoadExtensions() { std::vector extensions; - const auto Test = [&](const VkExtensionProperties& extension, - std::optional> status, const char* name, - bool push) { - if (extension.extensionName != std::string_view(name)) { - return; - } - if (push) { - extensions.push_back(name); - } - if (status) { - status->get() = true; - } - }; - extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); @@ -587,28 +576,36 @@ std::vector VKDevice::LoadExtensions() { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; - for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { - Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); - Test(extension, khr_uniform_buffer_standard_layout, + for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + const auto test = [&](std::optional> status, const char* name, + bool push) { + if (extension.extensionName != std::string_view(name)) { + return; + } + if (push) { + extensions.push_back(name); + } + if (status) { + status->get() = true; + } + }; + test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, - false); - Test(extension, ext_depth_range_unrestricted, - VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); - Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); - Test(extension, ext_shader_viewport_index_layer, - VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); - Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, - false); - Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, - false); - Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, - false); - Test(extension, has_ext_extended_dynamic_state, - VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); + test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, + true); + test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); + test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); + test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + if (instance_version >= VK_API_VERSION_1_1) { + test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + } if (Settings::values.renderer_debug) { - Test(extension, nv_device_diagnostics_config, - VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); + test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, + true); } } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 26a233db1..4286673d9 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -24,8 +24,8 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld); + explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical, + VkSurfaceKHR surface, const vk::InstanceDispatch& dld); ~VKDevice(); /// Initializes the device. Returns true on success. @@ -82,8 +82,13 @@ public: return present_family; } + /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers. + u32 InstanceApiVersion() const { + return instance_version; + } + /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. - u32 GetApiVersion() const { + u32 ApiVersion() const { return properties.apiVersion; } @@ -239,6 +244,7 @@ private: vk::Device logical; ///< Logical device. vk::Queue graphics_queue; ///< Main graphics queue. vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cd7d7a4e4..a20452b87 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -272,12 +272,19 @@ bool IsPrecise(Operation operand) { return false; } +u32 ShaderVersion(const VKDevice& device) { + if (device.InstanceApiVersion() < VK_API_VERSION_1_1) { + return 0x00010000; + } + return 0x00010300; +} + class SPIRVDecompiler final : public Sirit::Module { public: explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, const Registry& registry, const Specialization& specialization) - : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, - registry{registry}, specialization{specialization} { + : Module(ShaderVersion(device)), device{device}, ir{ir}, stage{stage}, + header{ir.GetHeader()}, registry{registry}, specialization{specialization} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -293,6 +300,7 @@ public: AddCapability(spv::Capability::DrawParameters); AddCapability(spv::Capability::SubgroupBallotKHR); AddCapability(spv::Capability::SubgroupVoteKHR); + AddExtension("SPV_KHR_16bit_storage"); AddExtension("SPV_KHR_shader_ballot"); AddExtension("SPV_KHR_subgroup_vote"); AddExtension("SPV_KHR_storage_buffer_storage_class"); -- cgit v1.2.3 From ffeb4ef83e731bb54a82080749ca22a263466788 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 23 Sep 2020 15:06:21 -0400 Subject: shader/registry: Make use of designated initializers where applicable Same behavior, less repetition. --- src/video_core/shader/registry.cpp | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index cdf274e54..de9a3df90 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -24,31 +24,33 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac if (shader_stage == ShaderType::Compute) { return {}; } - auto& graphics = static_cast(engine); - - GraphicsInfo info; - info.tfb_layouts = graphics.regs.tfb_layouts; - info.tfb_varying_locs = graphics.regs.tfb_varying_locs; - info.primitive_topology = graphics.regs.draw.topology; - info.tessellation_primitive = graphics.regs.tess_mode.prim; - info.tessellation_spacing = graphics.regs.tess_mode.spacing; - info.tfb_enabled = graphics.regs.tfb_enabled; - info.tessellation_clockwise = graphics.regs.tess_mode.cw; - return info; + + auto& graphics = dynamic_cast(engine); + + return { + .tfb_layouts = graphics.regs.tfb_layouts, + .tfb_varying_locs = graphics.regs.tfb_varying_locs, + .primitive_topology = graphics.regs.draw.topology, + .tessellation_primitive = graphics.regs.tess_mode.prim, + .tessellation_spacing = graphics.regs.tess_mode.spacing, + .tfb_enabled = graphics.regs.tfb_enabled != 0, + .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, + }; } ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { if (shader_stage != ShaderType::Compute) { return {}; } - auto& compute = static_cast(engine); + + auto& compute = dynamic_cast(engine); const auto& launch = compute.launch_description; - ComputeInfo info; - info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; - info.local_memory_size_in_words = launch.local_pos_alloc; - info.shared_memory_size_in_words = launch.shared_alloc; - return info; + return { + .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, + .shared_memory_size_in_words = launch.shared_alloc, + .local_memory_size_in_words = launch.local_pos_alloc, + }; } } // Anonymous namespace -- cgit v1.2.3 From cd6f4f7eed24d8562fbc8daec424e5a816ce6233 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 23 Sep 2020 15:08:31 -0400 Subject: shader/registry: Remove unnecessary namespace qualifiers Using statements already make these unnecessary. --- src/video_core/shader/registry.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index de9a3df90..3cf922002 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -55,12 +55,11 @@ ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& } // Anonymous namespace -Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) +Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) +Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine) : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( shader_stage, engine)} {} @@ -115,8 +114,7 @@ std::optional Registry::ObtainSeparateSampler return value; } -std::optional Registry::ObtainBindlessSampler(u32 buffer, - u32 offset) { +std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; const auto iter = bindless_samplers.find(key); if (iter != bindless_samplers.end()) { -- cgit v1.2.3 From 77532ebde3be78aa9a5471c496784d0151453289 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 23 Sep 2020 15:10:25 -0400 Subject: shader/registry: Silence a -Wshadow warning --- src/video_core/shader/registry.cpp | 8 ++++---- src/video_core/shader/registry.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 3cf922002..148d91fcb 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -59,10 +59,10 @@ Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( - shader_stage, engine)} {} +Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) + : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( + shader_stage, engine_)} {} Registry::~Registry() = default; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 231206765..4bebefdde 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -94,7 +94,7 @@ public: explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); + Tegra::Engines::ConstBufferEngineInterface& engine_); ~Registry(); -- cgit v1.2.3 From 67af0323f0599585825895144bcfcaea0e10bf46 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Sep 2020 21:38:05 -0300 Subject: video_core: Fix instances where msbuild always regenerated host shaders When HEADER_GENERATOR was included in the DEPENDS section of custom commands, msbuild assumed this was always modified. Changing this file is not common so we can remove it from there. --- src/video_core/host_shaders/CMakeLists.txt | 17 +++++------------ src/video_core/host_shaders/StringShaderHeader.cmake | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index aa62363a7..c157724a9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,23 +1,16 @@ -set(SHADER_FILES +set(SHADER_SOURCES opengl_present.frag opengl_present.vert ) set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) -set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) - set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) -add_custom_command( - OUTPUT - ${SHADER_DIR} - COMMAND - ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR} -) +set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_FILES}) +foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) @@ -29,8 +22,8 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) MAIN_DEPENDENCY ${SOURCE_FILE} DEPENDS - ${HEADER_GENERATOR} ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) endforeach() @@ -39,5 +32,5 @@ add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_FILES} + ${SHADER_SOURCES} ) diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake index 368bce0ed..c0fc49768 100644 --- a/src/video_core/host_shaders/StringShaderHeader.cmake +++ b/src/video_core/host_shaders/StringShaderHeader.cmake @@ -8,4 +8,6 @@ string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) file(READ ${SOURCE_FILE} CONTENTS) +get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY) +make_directory(${OUTPUT_DIR}) configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY) -- cgit v1.2.3 From e3a615a6162460a1eb865a5a78ae9d229a16eb58 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 24 Sep 2020 13:21:22 -0400 Subject: arithmetic_integer_immediate: Make use of std::move where applicable Same behavior, minus any redundant atomic reference count increments and decrements. --- .../shader/decode/arithmetic_integer_immediate.cpp | 35 ++++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 73880db0e..2a30aab2b 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD32I: { UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); + op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); + if (instr.alu.lop32i.invert_a) { + op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); + } - if (instr.alu.lop32i.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + if (instr.alu.lop32i.invert_b) { + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); + } - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, - PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); + WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), + std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, + instr.op_32.generates_cc != 0); break; } default: @@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, Node op_b, PredicateResultMode predicate_mode, Pred predicate, bool sets_cc) { - const Node result = [&]() { + Node result = [&] { switch (logic_op) { case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::PassB: return op_b; default: @@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation return; case PredicateResultMode::NotZero: { // Set the predicate to true if the result is not zero. - const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); - SetPredicate(bb, static_cast(predicate), compare); + Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); + SetPredicate(bb, static_cast(predicate), std::move(compare)); break; } default: -- cgit v1.2.3 From e0f2db437650c33e797bb33ee51c753a3c14fe86 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 25 Sep 2020 00:12:45 -0400 Subject: vk_command_pool: Add missing header guard --- src/video_core/renderer_vulkan/vk_command_pool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 3aee239b9..fb98f72fc 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include #include -- cgit v1.2.3 From 4ed4bba3050584cfe3e31a4bcc694c818c5baf2d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 25 Sep 2020 00:14:10 -0400 Subject: vk_command_pool: Make use of override on destructor --- src/video_core/renderer_vulkan/vk_command_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index fb98f72fc..92d8a9f4d 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -18,7 +18,7 @@ class VKDevice; class CommandPool final : public ResourcePool { public: explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); - virtual ~CommandPool(); + ~CommandPool() override; void Allocate(size_t begin, size_t end) override; -- cgit v1.2.3 From 940d85241bbd1f7fdbd65373e4c80b10025f8b1b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 25 Sep 2020 00:15:50 -0400 Subject: vk_command_pool: Move definition of Pool into the cpp file Allows the implementation details to be changed without recompiling any files that include this header. --- src/video_core/renderer_vulkan/vk_command_pool.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_command_pool.h | 5 +---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index f1abd4b1a..6339f4fe0 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -12,6 +12,11 @@ namespace Vulkan { constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; +struct CommandPool::Pool { + vk::CommandPool handle; + vk::CommandBuffers cmdbufs; +}; + CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 92d8a9f4d..b9cb3fb5d 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -25,10 +25,7 @@ public: VkCommandBuffer Commit(); private: - struct Pool { - vk::CommandPool handle; - vk::CommandBuffers cmdbufs; - }; + struct Pool; const VKDevice& device; std::vector pools; -- cgit v1.2.3 From ca26fd0f4297bc5cdf495c5304ed0bd9737f40b2 Mon Sep 17 00:00:00 2001 From: lat9nq Date: Fri, 25 Sep 2020 17:42:59 -0400 Subject: vk_stream_buffer: Fix initializing Vulkan with NVIDIA on Linux The previous fix only partially solved the issue, as only certain GPUs that needed 9 or less MiB subtracted would work (i.e. GTX 980 Ti, GT 730). This takes from DXVK's example to divide `heap_size` by 2 to determine `allocable_size`. Additionally tested on my Quadro K4200, which previously required setting it to 12 to boot. --- src/video_core/renderer_vulkan/vk_stream_buffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 5218c875b..1b59612b9 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -120,7 +120,8 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; - const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, -- cgit v1.2.3 From d7843b8ef2449a6af0bfd31b32bddae35bf99f6b Mon Sep 17 00:00:00 2001 From: Matías Locatti Date: Wed, 30 Sep 2020 03:13:38 -0300 Subject: Remove ext_extended_dynamic_state blacklist Latest AMD 20.9.2 driver fixed this, there's no reason to keep it blocked, as the previous stable signed driver release doesn't include the extension.--- src/video_core/renderer_vulkan/vk_device.cpp | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 05e31f1de..3d8d3213d 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -388,14 +388,6 @@ bool VKDevice::Create() { CollectTelemetryParameters(); - if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { - // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but the field - // seems to be bugged. Blacklisting it for now. - LOG_WARNING(Render_Vulkan, - "Blacklisting AMD proprietary from VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; - } - graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); -- cgit v1.2.3 From 2a24b1c9734a916e9a14579d4c550c84e83039b8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Oct 2020 21:19:35 -0300 Subject: video_core: Enforce -Wunused-variable and -Wunused-but-set-variable --- src/video_core/CMakeLists.txt | 8 +++++++- src/video_core/engines/maxwell_dma.cpp | 2 -- src/video_core/renderer_opengl/gl_device.cpp | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index da9e9fdda..2be455679 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -273,5 +273,11 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE /we4267) else() - target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion -Werror=switch) + target_compile_options(video_core PRIVATE + -Werror=conversion + -Wno-error=sign-conversion + -Werror=switch + -Werror=unused-variable + -Werror=unused-but-set-variable + ) endif() diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index e88290754..8fa359d0a 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -114,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const u32 block_depth = src_params.block_size.depth; const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t src_layer_size = - CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); if (read_buffer.size() < src_size) { read_buffer.resize(src_size); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e7d95149f..a94e4f72e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -193,7 +193,6 @@ bool IsASTCSupported() { Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast(glGetString(GL_VENDOR)); - const std::string_view renderer = reinterpret_cast(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); -- cgit v1.2.3 From cd3e959f237352f863e16ce7ca94f837c4f611db Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Oct 2020 17:13:20 -0300 Subject: renderer_vulkan/wrapper: Fix physical device sorting The old code had a sort function that was invalid and it didn't work as expected when the base vector had a different order (e.g. renderdoc was attached). This sorts devices as expected and fixes a debug assert on MSVC. --- src/video_core/renderer_vulkan/wrapper.cpp | 48 ++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 1fb14e190..2598440fb 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -17,21 +18,42 @@ namespace Vulkan::vk { namespace { +template +void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld, + Func&& func) { + // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap + // functions. + std::stable_sort(devices.begin(), devices.end(), + [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) { + return func(vk::PhysicalDevice(lhs, dld).GetProperties(), + vk::PhysicalDevice(rhs, dld).GetProperties()); + }); +} + +void SortPhysicalDevicesPerVendor(std::vector& devices, + const InstanceDispatch& dld, + std::initializer_list vendor_ids) { + for (auto it = vendor_ids.end(); it != vendor_ids.begin();) { + --it; + SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) { + return lhs.vendorID == id && rhs.vendorID != id; + }); + } +} + void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld) { - std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) { - // This will call Vulkan more than needed, but these calls are cheap. - const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties(); - const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties(); - - // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest. - const bool preferred = - (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && - rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || - (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) || - (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) || - (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086); - return !preferred; + // Sort by name, this will set a base and make GPUs with higher numbers appear first + // (e.g. GTX 1650 will intentionally be listed before a GTX 1080). + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName}; + }); + // Prefer discrete over non-discrete + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && + rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; }); + // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest. + SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086}); } template -- cgit v1.2.3 From dffaffaac1eb633d5907202df1ca0dbf338a6095 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Oct 2020 23:17:46 -0300 Subject: shader/texture: Implement CUBE texture type for TMML and fix arrays TMML takes an array argument that has no known meaning, this one appears as the first component in gpr8 followed by s, t and r. Skip this component when arrays are being used. Also implement CUBE texture types. - Used by Pikmin 3: Deluxe Demo. --- src/video_core/shader/decode/texture.cpp | 41 +++++++++++++++++--------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a03b50e39..4e932a4b6 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -292,33 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } - std::vector coords; - - // TODO: Add coordinates for different samplers once other texture types are implemented. - switch (texture_type) { - case TextureType::Texture1D: - coords.push_back(GetRegister(instr.gpr8)); - break; - case TextureType::Texture2D: - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast(texture_type)); + const u64 base_index = is_array ? 1 : 0; + const u64 num_components = [texture_type] { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast(texture_type)); + return 2; + } + }(); + // TODO: What's the array component used for? - // Fallback to interpreting as a 2D texture for now - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + std::vector coords; + coords.reserve(num_components); + for (u64 component = 0; component < num_components; ++component) { + coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); } + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { if (!instr.tmml.IsComponentEnabled(element)) { continue; } - auto params = coords; MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporary(bb, indexer++, value); + Node value = Operation(OperationCode::TextureQueryLod, meta, coords); + SetTemporary(bb, indexer++, std::move(value)); } for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); -- cgit v1.2.3 From 0120e5b1d97f1ebbdd23eed359804221eb697ad2 Mon Sep 17 00:00:00 2001 From: goldenx86 Date: Thu, 8 Oct 2020 21:17:08 -0300 Subject: vk_device: Block VK_EXT_extended_dynamic_state for RDNA devices RDNA devices seem to crash when using VK_EXT_extended_dynamic_state in the latest 20.9.2 proprietary Windows drivers. As a workaround, for now we block device names corresponding to current RDNA released products. --- src/video_core/renderer_vulkan/vk_device.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 3d8d3213d..1f057b43b 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -79,6 +79,21 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType } } +[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) { + static constexpr std::array RDNA_DEVICES{ + "5700", + "5600", + "5500", + "5300", + }; + if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { + return false; + } + return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) { + return device_name.find(name) != std::string_view::npos; + }); +} + std::unordered_map GetFormatProperties( vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { static constexpr std::array formats{ @@ -388,6 +403,15 @@ bool VKDevice::Create() { CollectTelemetryParameters(); + if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { + // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it + // seems to cause stability issues + LOG_WARNING( + Render_Vulkan, + "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } + graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); -- cgit v1.2.3 From e1600b0962c78302b05d4b98d75245b980a03831 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Oct 2020 21:24:22 -0300 Subject: video_core: Enforce -Wclass-memaccess --- src/video_core/CMakeLists.txt | 1 + src/video_core/engines/shader_header.h | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2be455679..3df54816d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -279,5 +279,6 @@ else() -Werror=switch -Werror=unused-variable -Werror=unused-but-set-variable + -Werror=class-memaccess ) endif() diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 72e2a33d5..ceec05459 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -41,30 +41,30 @@ struct Header { BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; - } common0{}; + } common0; union { BitField<0, 24, u32> shader_local_memory_low_size; BitField<24, 8, u32> per_patch_attribute_count; - } common1{}; + } common1; union { BitField<0, 24, u32> shader_local_memory_high_size; BitField<24, 8, u32> threads_per_input_primitive; - } common2{}; + } common2; union { BitField<0, 24, u32> shader_local_memory_crs_size; BitField<24, 4, OutputTopology> output_topology; BitField<28, 4, u32> reserved; - } common3{}; + } common3; union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. BitField<20, 4, u32> reserved; BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4{}; + } common4; union { struct { @@ -145,7 +145,7 @@ struct Header { } } ps; - std::array raw{}; + std::array raw; }; u64 GetLocalMemorySize() const { @@ -153,7 +153,6 @@ struct Header { (common2.shader_local_memory_high_size << 24)); } }; - static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); } // namespace Tegra::Shader -- cgit v1.2.3 From e4e0abc418de022903a084c9409cd502ce6c5629 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Oct 2020 21:33:27 -0300 Subject: vk_graphics_pipeline: Manage primitive topology as fixed state Vulkan has requirements for primitive topologies that don't play nicely with yuzu's. Since it's only 4 bits, we can move it to fixed state without changing the size of the pipeline key. - Fixes a regression on recent Nvidia drivers on Fire Emblem: Three Houses. --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 3 +-- src/video_core/renderer_vulkan/fixed_pipeline_state.h | 10 +++------- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 ----------- src/video_core/renderer_vulkan/vk_rasterizer.h | 1 - 6 files changed, 7 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 81a39a3b8..da5c550ea 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -58,6 +58,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); + topology.Assign(regs.draw.topology); std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast @@ -131,7 +132,6 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size } void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { - const u32 topology_index = static_cast(regs.draw.topology.Value()); u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face @@ -161,7 +161,6 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { depth_test_enable.Assign(regs.depth_test_enable); front_face.Assign(packed_front_face); depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); - topology.Assign(topology_index); cull_face.Assign(PackCullFace(regs.cull_face)); cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index cdcbb65f5..2c18eeaae 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -150,9 +150,8 @@ struct FixedPipelineState { }; union { u32 raw2; - BitField<0, 4, u32> topology; - BitField<4, 2, u32> cull_face; - BitField<6, 1, u32> cull_enable; + BitField<0, 2, u32> cull_face; + BitField<2, 1, u32> cull_enable; }; std::array vertex_bindings; @@ -169,10 +168,6 @@ struct FixedPipelineState { Maxwell::FrontFace FrontFace() const noexcept { return UnpackFrontFace(front_face.Value()); } - - constexpr Maxwell::PrimitiveTopology Topology() const noexcept { - return static_cast(topology.Value()); - } }; union { @@ -190,6 +185,7 @@ struct FixedPipelineState { BitField<18, 1, u32> logic_op_enable; BitField<19, 4, u32> logic_op; BitField<23, 1, u32> rasterize_enable; + BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; u32 point_size; std::array binding_divisors; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a4b9e7ef5..696eaeb5f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -261,12 +261,12 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()), + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; @@ -400,7 +400,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5c038f4bc..dedc9c466 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -331,8 +331,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { std::pair> VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { Specialization specialization; - if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || - device.IsExtExtendedDynamicStateSupported()) { + if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { float point_size; std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); specialization.point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f3c2483c8..e0fb8693f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -948,7 +948,6 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthWriteEnable(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); - UpdatePrimitiveTopology(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); } @@ -1418,16 +1417,6 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); } -void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { - const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); - if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) { - return; - } - scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { - cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); - }); -} - void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilOp()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b47c8fc13..237e51fa4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -259,7 +259,6 @@ private: void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); -- cgit v1.2.3 From f21a189148c7c306885b26801e72fb84f867e88b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Oct 2020 03:15:50 -0300 Subject: gl_arb_decompiler: Implement robust buffer operations This emulates the behavior we get on GLSL with regular SSBOs with a pointer + length pair. It aims to be consistent with the crashes we might get. Out of bounds stores are ignored. Atomics are ignored and return zero. Reads return zero. --- .../renderer_opengl/gl_arb_decompiler.cpp | 31 ++++++++++---- src/video_core/renderer_opengl/gl_rasterizer.cpp | 47 +++++++++++----------- src/video_core/renderer_opengl/gl_rasterizer.h | 9 ++++- 3 files changed, 54 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index b7e9ed2e9..f4db62787 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -376,9 +376,11 @@ private: std::string temporary = AllocTemporary(); std::string address; std::string_view opname; + bool robust = false; if (const auto gmem = std::get_if(&*operation[0])) { address = GlobalMemoryPointer(*gmem); opname = "ATOM"; + robust = true; } else if (const auto smem = std::get_if(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -386,7 +388,15 @@ private: UNREACHABLE(); return "{0, 0, 0, 0}"; } + if (robust) { + AddLine("IF NE.x;"); + } AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); + if (robust) { + AddLine("ELSE;"); + AddLine("MOV.S {}, 0;", temporary); + AddLine("ENDIF;"); + } return temporary; } @@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - const std::size_t num_entries = ir.GetGlobalMemory().size(); + const size_t num_entries = ir.GetGlobalMemory().size(); if (num_entries > 0) { - const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); } } @@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if(&*node)) { std::string temporary = AllocTemporary(); - AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); + AddLine("MOV {}, 0;", temporary); + AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { } std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + // Read a bindless SSBO, return its address and set CC accordingly + // address = c[binding].xy + // length = c[binding].z const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; const std::string pointer = AllocLongVectorTemporary(); std::string temporary = AllocTemporary(); - const u32 local_index = binding / 2; - AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("PK64.U {}, c[{}];", pointer, binding); AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), Visit(gmem.GetBaseAddress())); AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); + // Compare offset to length and set CC + AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); return fmt::format("{}.x", pointer); } @@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if(&*dest)) { + AddLine("IF NE.x;"); AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); + AddLine("ENDIF;"); ResetTemporaries(); return {}; } else { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bbb2eb17c..36bf92808 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } -void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { - if (num_entries == 0) { +void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { + if (num_ssbos == 0) { return; } - if (num_entries % 2 == 1) { - pointers[num_entries] = 0; - } - const GLsizei num_vectors = static_cast((num_entries + 1) / 2); - glProgramLocalParametersI4uivNV(target, 0, num_vectors, - reinterpret_cast(pointers)); + glProgramLocalParametersI4uivNV(target, 0, static_cast(num_ssbos), + reinterpret_cast(ssbos)); } } // Anonymous namespace @@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { - static constexpr std::array PARAMETER_LUT = { - GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, + static constexpr std::array PARAMETER_LUT{ + GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, - GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; - + GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, + }; MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = maxwell3d.state.shader_stages; const auto& shader_stage = stages[stage_index]; @@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; - std::array pointers; - ASSERT(entries.size() < pointers.size()); + std::array ssbos; + ASSERT(entries.size() < ssbos.size()); const bool assembly_shaders = device.UseAssemblyShaders(); u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; @@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{gpu_memory.Read(addr)}; const u32 size{gpu_memory.Read(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); ++binding; } if (assembly_shaders) { - UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); + UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); } } @@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; const auto& entries{kernel->GetEntries().global_memory_entries}; - std::array pointers; - ASSERT(entries.size() < pointers.size()); + std::array ssbos; + ASSERT(entries.size() < ssbos.size()); u32 binding = 0; for (const auto& entry : entries) { const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; const GPUVAddr gpu_addr{gpu_memory.Read(addr)}; const u32 size{gpu_memory.Read(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); ++binding; } if (device.UseAssemblyShaders()) { - UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); + UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size, - GLuint64EXT* pointer) { - const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { + const size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); if (device.UseAssemblyShaders()) { - *pointer = info.address + info.offset; + *ssbo = BindlessSSBO{ + .address = static_cast(info.address + info.offset), + .length = static_cast(size), + .padding = 0, + }; } else { glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast(size)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f451404b2..1d0f585fa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -53,6 +53,13 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; +struct BindlessSSBO { + GLuint64EXT address; + GLsizei length; + GLsizei padding; +}; +static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); + class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, @@ -126,7 +133,7 @@ private: /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size, GLuint64EXT* pointer); + size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); -- cgit v1.2.3 From 678d012c2c3ce7b6bac136576d15f23b2d4b05a8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 20 Oct 2020 19:24:28 -0400 Subject: video_core: Conditially activate relevant compiler warnings These compiler flags aren't shared with clang, so specifying these flags unconditionally can lead to a bit of warning spam. While we're in the area, we can also enable -Wunused-but-set-parameter given this is almost always a bug. --- src/video_core/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3df54816d..77ebac19f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -278,7 +278,9 @@ else() -Wno-error=sign-conversion -Werror=switch -Werror=unused-variable - -Werror=unused-but-set-variable - -Werror=class-memaccess + + $<$:-Werror=class-memaccess> + $<$:-Werror=unused-but-set-parameter> + $<$:-Werror=unused-but-set-variable> ) endif() -- cgit v1.2.3 From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/video_core/CMakeLists.txt | 26 + src/video_core/cdma_pusher.cpp | 171 ++++ src/video_core/cdma_pusher.h | 138 +++ src/video_core/command_classes/codecs/codec.cpp | 114 +++ src/video_core/command_classes/codecs/codec.h | 68 ++ src/video_core/command_classes/codecs/h264.cpp | 276 ++++++ src/video_core/command_classes/codecs/h264.h | 130 +++ src/video_core/command_classes/codecs/vp9.cpp | 1010 +++++++++++++++++++++ src/video_core/command_classes/codecs/vp9.h | 216 +++++ src/video_core/command_classes/codecs/vp9_types.h | 369 ++++++++ src/video_core/command_classes/host1x.cpp | 39 + src/video_core/command_classes/host1x.h | 78 ++ src/video_core/command_classes/nvdec.cpp | 56 ++ src/video_core/command_classes/nvdec.h | 39 + src/video_core/command_classes/nvdec_common.h | 48 + src/video_core/command_classes/sync_manager.cpp | 60 ++ src/video_core/command_classes/sync_manager.h | 64 ++ src/video_core/command_classes/vic.cpp | 180 ++++ src/video_core/command_classes/vic.h | 110 +++ src/video_core/gpu.cpp | 11 +- src/video_core/gpu.h | 23 +- src/video_core/gpu_asynch.cpp | 26 +- src/video_core/gpu_asynch.h | 3 +- src/video_core/gpu_synch.cpp | 18 +- src/video_core/gpu_synch.h | 3 +- src/video_core/gpu_thread.cpp | 16 +- src/video_core/gpu_thread.h | 19 +- src/video_core/memory_manager.cpp | 12 +- src/video_core/memory_manager.h | 5 +- src/video_core/video_core.cpp | 5 +- 30 files changed, 3311 insertions(+), 22 deletions(-) create mode 100644 src/video_core/cdma_pusher.cpp create mode 100644 src/video_core/cdma_pusher.h create mode 100644 src/video_core/command_classes/codecs/codec.cpp create mode 100644 src/video_core/command_classes/codecs/codec.h create mode 100644 src/video_core/command_classes/codecs/h264.cpp create mode 100644 src/video_core/command_classes/codecs/h264.h create mode 100644 src/video_core/command_classes/codecs/vp9.cpp create mode 100644 src/video_core/command_classes/codecs/vp9.h create mode 100644 src/video_core/command_classes/codecs/vp9_types.h create mode 100644 src/video_core/command_classes/host1x.cpp create mode 100644 src/video_core/command_classes/host1x.h create mode 100644 src/video_core/command_classes/nvdec.cpp create mode 100644 src/video_core/command_classes/nvdec.h create mode 100644 src/video_core/command_classes/nvdec_common.h create mode 100644 src/video_core/command_classes/sync_manager.cpp create mode 100644 src/video_core/command_classes/sync_manager.h create mode 100644 src/video_core/command_classes/vic.cpp create mode 100644 src/video_core/command_classes/vic.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 77ebac19f..fdfc885fc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,6 +5,24 @@ add_library(video_core STATIC buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp buffer_cache/map_interval.h + cdma_pusher.cpp + cdma_pusher.h + command_classes/codecs/codec.cpp + command_classes/codecs/codec.h + command_classes/codecs/h264.cpp + command_classes/codecs/h264.h + command_classes/codecs/vp9.cpp + command_classes/codecs/vp9.h + command_classes/codecs/vp9_types.h + command_classes/host1x.cpp + command_classes/host1x.h + command_classes/nvdec.cpp + command_classes/nvdec.h + command_classes/nvdec_common.h + command_classes/sync_manager.cpp + command_classes/sync_manager.h + command_classes/vic.cpp + command_classes/vic.h compatible_formats.cpp compatible_formats.h dirty_flags.cpp @@ -250,6 +268,14 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) +if (MSVC) + target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) + target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib) +else() + target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) + target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES}) +endif() + add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp new file mode 100644 index 000000000..d774db107 --- /dev/null +++ b/src/video_core/cdma_pusher.cpp @@ -0,0 +1,171 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include "command_classes/host1x.h" +#include "command_classes/nvdec.h" +#include "command_classes/vic.h" +#include "common/bit_util.h" +#include "video_core/cdma_pusher.h" +#include "video_core/command_classes/nvdec_common.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra { +CDmaPusher::CDmaPusher(GPU& gpu) + : gpu(gpu), nvdec_processor(std::make_shared(gpu)), + vic_processor(std::make_unique(gpu, nvdec_processor)), + host1x_processor(std::make_unique(gpu)), + nvdec_sync(std::make_unique(gpu)), + vic_sync(std::make_unique(gpu)) {} + +CDmaPusher::~CDmaPusher() = default; + +void CDmaPusher::Push(ChCommandHeaderList&& entries) { + cdma_queue.push(std::move(entries)); +} + +void CDmaPusher::DispatchCalls() { + while (!cdma_queue.empty()) { + Step(); + } +} + +void CDmaPusher::Step() { + const auto entries{cdma_queue.front()}; + cdma_queue.pop(); + + std::vector values(entries.size()); + std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); + + for (const u32 value : values) { + if (mask != 0) { + const u32 lbs = Common::CountTrailingZeroes32(mask); + mask &= ~(1U << lbs); + ExecuteCommand(static_cast(offset + lbs), value); + continue; + } else if (count != 0) { + --count; + ExecuteCommand(static_cast(offset), value); + if (incrementing) { + ++offset; + } + continue; + } + const auto mode = static_cast((value >> 28) & 0xf); + switch (mode) { + case ChSubmissionMode::SetClass: { + mask = value & 0x3f; + offset = (value >> 16) & 0xfff; + current_class = static_cast((value >> 6) & 0x3ff); + break; + } + case ChSubmissionMode::Incrementing: + case ChSubmissionMode::NonIncrementing: + count = value & 0xffff; + offset = (value >> 16) & 0xfff; + incrementing = mode == ChSubmissionMode::Incrementing; + break; + case ChSubmissionMode::Mask: + mask = value & 0xffff; + offset = (value >> 16) & 0xfff; + break; + case ChSubmissionMode::Immediate: { + const u32 data = value & 0xfff; + offset = (value >> 16) & 0xfff; + ExecuteCommand(static_cast(offset), data); + break; + } + default: + UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast(mode)); + break; + } + } +} + +void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { + switch (current_class) { + case ChClassId::NvDec: + ThiStateWrite(nvdec_thi_state, offset, {data}); + switch (static_cast(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast(data & 0xFF); + const auto cond = static_cast((data >> 8) & 0xFF); + if (cond == 0) { + nvdec_sync->Increment(syncpoint_id); + } else { + nvdec_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); + nvdec_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", + static_cast(nvdec_thi_state.method_0)); + nvdec_processor->ProcessMethod( + static_cast(nvdec_thi_state.method_0), {data}); + break; + default: + break; + } + break; + case ChClassId::GraphicsVic: + ThiStateWrite(vic_thi_state, static_cast(offset), {data}); + switch (static_cast(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast(data & 0xFF); + const auto cond = static_cast((data >> 8) & 0xFF); + if (cond == 0) { + vic_sync->Increment(syncpoint_id); + } else { + vic_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); + vic_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", + static_cast(vic_thi_state.method_0)); + vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), + {data}); + break; + default: + break; + } + break; + case ChClassId::Host1x: + // This device is mainly for syncpoint synchronization + LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); + host1x_processor->ProcessMethod(static_cast(offset), {data}); + break; + default: + UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast(current_class)); + break; + } +} + +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments) { + u8* const state_offset = reinterpret_cast(&state) + sizeof(u32) * offset; + std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +} + +} // namespace Tegra diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h new file mode 100644 index 000000000..982f309c5 --- /dev/null +++ b/src/video_core/cdma_pusher.h @@ -0,0 +1,138 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/command_classes/sync_manager.h" + +namespace Tegra { + +class GPU; +class Nvdec; +class Vic; +class Host1x; + +enum class ChSubmissionMode : u32 { + SetClass = 0, + Incrementing = 1, + NonIncrementing = 2, + Mask = 3, + Immediate = 4, + Restart = 5, + Gather = 6, +}; + +enum class ChClassId : u32 { + NoClass = 0x0, + Host1x = 0x1, + VideoEncodeMpeg = 0x20, + VideoEncodeNvEnc = 0x21, + VideoStreamingVi = 0x30, + VideoStreamingIsp = 0x32, + VideoStreamingIspB = 0x34, + VideoStreamingViI2c = 0x36, + GraphicsVic = 0x5d, + Graphics3D = 0x60, + GraphicsGpu = 0x61, + Tsec = 0xe0, + TsecB = 0xe1, + NvJpg = 0xc0, + NvDec = 0xf0 +}; + +enum class ChMethod : u32 { + Empty = 0, + SetMethod = 0x10, + SetData = 0x11, +}; + +union ChCommandHeader { + u32 raw; + BitField<0, 16, u32> value; + BitField<16, 12, ChMethod> method_offset; + BitField<28, 4, ChSubmissionMode> submission_mode; +}; +static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size"); + +struct ChCommand { + ChClassId class_id{}; + int method_offset{}; + std::vector arguments; +}; + +using ChCommandHeaderList = std::vector; +using ChCommandList = std::vector; + +struct ThiRegisters { + u32_le increment_syncpt{}; + INSERT_PADDING_WORDS(1); + u32_le increment_syncpt_error{}; + u32_le ctx_switch_incremement_syncpt{}; + INSERT_PADDING_WORDS(4); + u32_le ctx_switch{}; + INSERT_PADDING_WORDS(1); + u32_le ctx_syncpt_eof{}; + INSERT_PADDING_WORDS(5); + u32_le method_0{}; + u32_le method_1{}; + INSERT_PADDING_WORDS(12); + u32_le int_status{}; + u32_le int_mask{}; +}; + +enum class ThiMethod : u32 { + IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32), + SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32), + SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32), +}; + +class CDmaPusher { +public: + explicit CDmaPusher(GPU& gpu); + ~CDmaPusher(); + + /// Push NVDEC command buffer entries into queue + void Push(ChCommandHeaderList&& entries); + + /// Process queued command buffer entries + void DispatchCalls(); + + /// Process one queue element + void Step(); + + /// Invoke command class devices to execute the command based on the current state + void ExecuteCommand(u32 offset, u32 data); + +private: + /// Write arguments value to the ThiRegisters member at the specified offset + void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments); + + GPU& gpu; + + std::shared_ptr nvdec_processor; + std::unique_ptr vic_processor; + std::unique_ptr host1x_processor; + std::unique_ptr nvdec_sync; + std::unique_ptr vic_sync; + ChClassId current_class{}; + ThiRegisters vic_thi_state{}; + ThiRegisters nvdec_thi_state{}; + + s32 count{}; + s32 offset{}; + s32 mask{}; + bool incrementing{}; + + // Queue of command lists to be processed + std::queue cdma_queue; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp new file mode 100644 index 000000000..2df410be8 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -0,0 +1,114 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/assert.h" +#include "video_core/command_classes/codecs/codec.h" +#include "video_core/command_classes/codecs/h264.h" +#include "video_core/command_classes/codecs/vp9.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +extern "C" { +#include +} + +namespace Tegra { + +Codec::Codec(GPU& gpu_) + : gpu(gpu_), h264_decoder(std::make_unique(gpu)), + vp9_decoder(std::make_unique(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_send_packet(av_codec_ctx, nullptr); + avcodec_receive_frame(av_codec_ctx, av_frame); + avcodec_flush_buffers(av_codec_ctx); + + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); +} + +void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast(codec)); + current_codec = codec; +} + +void Codec::StateWrite(u32 offset, u64 arguments) { + u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u64); + std::memcpy(state_offset, &arguments, sizeof(u64)); +} + +void Codec::Decode() { + bool is_first_frame = false; + + if (!initialized) { + if (current_codec == NvdecCommon::VideoCodec::H264) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); + } else { + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast(current_codec)); + return; + } + + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_frame = av_frame_alloc(); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + is_first_frame = true; + } + bool vp9_hidden_frame = false; + + AVPacket packet{}; + av_init_packet(&packet); + std::vector frame_data; + + if (current_codec == NvdecCommon::VideoCodec::H264) { + frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + frame_data = vp9_decoder->ComposeFrameHeader(state); + vp9_hidden_frame = vp9_decoder->WasFrameHidden(); + } + + packet.data = frame_data.data(); + packet.size = static_cast(frame_data.size()); + + avcodec_send_packet(av_codec_ctx, &packet); + + if (!vp9_hidden_frame) { + // Only receive/store visible frames + avcodec_receive_frame(av_codec_ctx, av_frame); + } +} + +AVFrame* Codec::GetCurrentFrame() { + return av_frame; +} + +const AVFrame* Codec::GetCurrentFrame() const { + return av_frame; +} + +NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { + return current_codec; +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h new file mode 100644 index 000000000..2e56daf29 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.h @@ -0,0 +1,68 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +extern "C" { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#include +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +namespace Tegra { +class GPU; +struct VicRegisters; + +namespace Decoder { +class H264; +class VP9; +} // namespace Decoder + +class Codec { +public: + explicit Codec(GPU& gpu); + ~Codec(); + + /// Sets NVDEC video stream codec + void SetTargetCodec(NvdecCommon::VideoCodec codec); + + /// Populate NvdecRegisters state with argument value at the provided offset + void StateWrite(u32 offset, u64 arguments); + + /// Call decoders to construct headers, decode AVFrame with ffmpeg + void Decode(); + + /// Returns most recently decoded frame + AVFrame* GetCurrentFrame(); + const AVFrame* GetCurrentFrame() const; + + /// Returns the value of current_codec + NvdecCommon::VideoCodec GetCurrentCodec() const; + +private: + bool initialized{}; + NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; + + AVCodec* av_codec{nullptr}; + AVCodecContext* av_codec_ctx{nullptr}; + AVFrame* av_frame{nullptr}; + + GPU& gpu; + std::unique_ptr h264_decoder; + std::unique_ptr vp9_decoder; + + NvdecCommon::NvdecRegisters state{}; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp new file mode 100644 index 000000000..1a39f7b23 --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -0,0 +1,276 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include "common/bit_util.h" +#include "video_core/command_classes/codecs/h264.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Decoder { +H264::H264(GPU& gpu_) : gpu(gpu_) {} + +H264::~H264() = default; + +std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { + H264DecoderContext context{}; + gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); + + const s32 frame_number = static_cast((context.h264_parameter_set.flags >> 46) & 0x1ffff); + if (!is_first_frame && frame_number != 0) { + frame.resize(context.frame_data_size); + + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); + } else { + /// Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3; + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { + writer.WriteBit(false); + } + + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag + + const s32 order_cnt_type = static_cast((context.h264_parameter_set.flags >> 14) & 3); + writer.WriteUe(static_cast((context.h264_parameter_set.flags >> 8) & 0xf)); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + + writer.WriteSe(0); + writer.WriteSe(0); + writer.WriteUe(0); + } + + const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); + + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); + } + + writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); + writer.WriteU(static_cast((context.h264_parameter_set.flags >> 32) & 0x3), 2); + s32 pic_init_qp = static_cast((context.h264_parameter_set.flags >> 16) & 0x3f); + pic_init_qp = (pic_init_qp << 26) >> 26; + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast((context.h264_parameter_set.flags >> 22) & 0x1f); + chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); + writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { + writer.WriteBit(true); + const auto matrix_x4 = + std::vector(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); + writer.WriteScalingList(matrix_x4, index * 16, 16); + } + + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { + writer.WriteBit(true); + const auto matrix_x8 = std::vector(context.scaling_matrix_8.begin(), + context.scaling_matrix_8.end()); + + writer.WriteScalingList(matrix_x8, index * 64, 64); + } + } + + s32 chroma_qp_index_offset2 = + static_cast((context.h264_parameter_set.flags >> 27) & 0x1f); + chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + + writer.WriteSe(chroma_qp_index_offset2); + + writer.End(); + + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.frame_data_size); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), + context.frame_data_size); + } + + return frame; +} + +H264BitWriter::H264BitWriter() = default; + +H264BitWriter::~H264BitWriter() = default; + +void H264BitWriter::WriteU(s32 value, s32 value_sz) { + WriteBits(value, value_sz); +} + +void H264BitWriter::WriteSe(s32 value) { + WriteExpGolombCodedInt(value); +} + +void H264BitWriter::WriteUe(s32 value) { + WriteExpGolombCodedUInt((u32)value); +} + +void H264BitWriter::End() { + WriteBit(true); + Flush(); +} + +void H264BitWriter::WriteBit(bool state) { + WriteBits(state ? 1 : 0, 1); +} + +void H264BitWriter::WriteScalingList(const std::vector& list, s32 start, s32 count) { + std::vector scan(count); + if (count == 16) { + std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); + } else { + std::memcpy(scan.data(), zig_zag_direct.data(), scan.size()); + } + u8 last_scale = 8; + + for (s32 index = 0; index < count; index++) { + const u8 value = list[start + scan[index]]; + const s32 delta_scale = static_cast(value - last_scale); + + WriteSe(delta_scale); + + last_scale = value; + } +} + +std::vector& H264BitWriter::GetByteArray() { + return byte_array; +} + +const std::vector& H264BitWriter::GetByteArray() const { + return byte_array; +} + +void H264BitWriter::WriteBits(s32 value, s32 bit_count) { + s32 value_pos = 0; + + s32 remaining = bit_count; + + while (remaining > 0) { + s32 copy_size = remaining; + + const s32 free_bits = GetFreeBufferBits(); + + if (copy_size > free_bits) { + copy_size = free_bits; + } + + const s32 mask = (1 << copy_size) - 1; + + const s32 src_shift = (bit_count - value_pos) - copy_size; + const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; + + buffer |= ((value >> src_shift) & mask) << dst_shift; + + value_pos += copy_size; + buffer_pos += copy_size; + remaining -= copy_size; + } +} + +void H264BitWriter::WriteExpGolombCodedInt(s32 value) { + const s32 sign = value <= 0 ? 0 : 1; + if (value < 0) { + value = -value; + } + value = (value << 1) - sign; + WriteExpGolombCodedUInt(value); +} + +void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { + const s32 size = 32 - Common::CountLeadingZeroes32(static_cast(value + 1)); + WriteBits(1, size); + + value -= (1U << (size - 1)) - 1; + WriteBits(static_cast(value), size - 1); +} + +s32 H264BitWriter::GetFreeBufferBits() { + if (buffer_pos == buffer_size) { + Flush(); + } + + return buffer_size - buffer_pos; +} + +void H264BitWriter::Flush() { + if (buffer_pos == 0) { + return; + } + byte_array.push_back(static_cast(buffer)); + + buffer = 0; + buffer_pos = 0; +} +} // namespace Tegra::Decoder diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h new file mode 100644 index 000000000..21752dd90 --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.h @@ -0,0 +1,130 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#pragma once + +#include +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; +namespace Decoder { + +class H264BitWriter { +public: + H264BitWriter(); + ~H264BitWriter(); + + /// The following Write methods are based on clause 9.1 in the H.264 specification. + /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax + void WriteU(s32 value, s32 value_sz); + void WriteSe(s32 value); + void WriteUe(s32 value); + + /// Finalize the bitstream + void End(); + + /// append a bit to the stream, equivalent value to the state parameter + void WriteBit(bool state); + + /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification + /// Writes the scaling matrices of the sream + void WriteScalingList(const std::vector& list, s32 start, s32 count); + + /// Return the bitstream as a vector. + std::vector& GetByteArray(); + const std::vector& GetByteArray() const; + +private: + // ZigZag LUTs from libavcodec. + static constexpr std::array zig_zag_direct{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, + 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, + 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, + }; + + static constexpr std::array zig_zag_scan{ + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, + }; + + void WriteBits(s32 value, s32 bit_count); + void WriteExpGolombCodedInt(s32 value); + void WriteExpGolombCodedUInt(u32 value); + s32 GetFreeBufferBits(); + void Flush(); + + s32 buffer_size{8}; + + s32 buffer{}; + s32 buffer_pos{}; + std::vector byte_array; +}; + +class H264 { +public: + explicit H264(GPU& gpu); + ~H264(); + + /// Compose the H264 header of the frame for FFmpeg decoding + std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); + +private: + struct H264ParameterSet { + u32 log2_max_pic_order_cnt{}; + u32 delta_pic_order_always_zero_flag{}; + u32 frame_mbs_only_flag{}; + u32 pic_width_in_mbs{}; + u32 pic_height_in_map_units{}; + INSERT_PADDING_WORDS(1); + u32 entropy_coding_mode_flag{}; + u32 bottom_field_pic_order_flag{}; + u32 num_refidx_l0_default_active{}; + u32 num_refidx_l1_default_active{}; + u32 deblocking_filter_control_flag{}; + u32 redundant_pic_count_flag{}; + u32 transform_8x8_mode_flag{}; + INSERT_PADDING_WORDS(9); + u64 flags{}; + u32 frame_number{}; + u32 frame_number2{}; + }; + static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + + struct H264DecoderContext { + INSERT_PADDING_BYTES(0x48); + u32 frame_data_size{}; + INSERT_PADDING_BYTES(0xc); + H264ParameterSet h264_parameter_set{}; + INSERT_PADDING_BYTES(0x100); + std::array scaling_matrix_4; + std::array scaling_matrix_8; + }; + static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); + + std::vector frame; + GPU& gpu; +}; + +} // namespace Decoder +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp new file mode 100644 index 000000000..3bae0bb5d --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -0,0 +1,1010 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include // for std::memcpy +#include +#include "video_core/command_classes/codecs/vp9.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Decoder { + +// Default compressed header probabilities once frame context resets +constexpr Vp9EntropyProbs default_probs{ + .y_mode_prob{ + 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78, + 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29, + }, + .partition_prob{ + 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, + 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, + 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, + 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0, + }, + .coef_probs{ + 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0, + 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, + 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, + 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0, + 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0, + 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, + 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0, + 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0, + 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, + 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, + 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0, + 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0, + 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, + 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0, + 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0, + 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, + 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, + 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0, + 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0, + 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, + 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, + 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0, + 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0, + 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, + 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, + 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0, + 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, + 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, + 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0, + 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0, + 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, + 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, + 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0, + 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, + 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, + 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0, + 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0, + 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, + 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0, + 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0, + 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, + 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, + 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0, + 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0, + 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, + 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0, + 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0, + 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, + 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, + 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0, + 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0, + 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, + 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, + 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0, + 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0, + 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, + 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, + 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0, + 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, + 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, + 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0, + 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0, + 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, + 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, + 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0, + 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, + 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, + 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0, + 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0, + 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, + 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0, + 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0, + 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, + 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, + 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0, + 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0, + 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, + 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0, + 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0, + 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, + 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, + 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0, + 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0, + 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, + 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, + 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0, + 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0, + 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, + 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, + 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0, + 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, + 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, + 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0, + 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0, + 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, + 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, + 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0, + 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, + 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, + 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0, + 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0, + 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, + 1, 16, 6, 0, + }, + .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144}, + .inter_mode_prob{ + 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, + 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0, + }, + .intra_inter_prob{9, 102, 187, 225}, + .comp_inter_prob{9, 102, 187, 225, 0}, + .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247}, + .comp_ref_prob{50, 126, 123, 221, 226}, + .tx_32x32_prob{3, 136, 37, 5, 52, 13}, + .tx_16x16_prob{20, 152, 15, 101}, + .tx_8x8_prob{100, 66}, + .skip_probs{192, 128, 64}, + .joints{32, 64, 96}, + .sign{128, 128}, + .classes{ + 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, + 216, 128, 176, 160, 176, 176, 192, 198, 198, 208, + }, + .class_0{216, 208}, + .prob_bits{ + 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, + 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, + }, + .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64}, + .fr{64, 96, 64, 64, 96, 64}, + .class_0_hp{160, 160}, + .high_precision{128, 128}, +}; + +VP9::VP9(GPU& gpu) : gpu(gpu) {} + +VP9::~VP9() = default; + +void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const bool update = new_prob != old_prob; + + writer.Write(update, diff_update_probability); + + if (update) { + WriteProbabilityDelta(writer, new_prob, old_prob); + } +} +template +void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array& new_prob, + const std::array& old_prob) { + for (std::size_t offset = 0; offset < new_prob.size(); ++offset) { + WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]); + } +} + +template +void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array& new_prob, + const std::array& old_prob) { + for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) { + WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]); + WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]); + WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]); + } +} + +void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const int delta = RemapProbability(new_prob, old_prob); + + EncodeTermSubExp(writer, delta); +} + +s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) { + new_prob--; + old_prob--; + + std::size_t index{}; + + if (old_prob * 2 <= 0xff) { + index = static_cast(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); + } else { + index = static_cast( + std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + } + + return map_lut[index]; +} + +s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) { + if (new_prob > old_prob * 2) { + return new_prob; + } else if (new_prob >= old_prob) { + return (new_prob - old_prob) * 2; + } else { + return (old_prob - new_prob) * 2 - 1; + } +} + +void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { + if (WriteLessThan(writer, value, 16)) { + writer.Write(value, 4); + } else if (WriteLessThan(writer, value, 32)) { + writer.Write(value - 16, 4); + } else if (WriteLessThan(writer, value, 64)) { + writer.Write(value - 32, 5); + } else { + value -= 64; + + constexpr s32 size = 8; + + const s32 mask = (1 << size) - 191; + + const s32 delta = value - mask; + + if (delta < 0) { + writer.Write(value, size - 1); + } else { + writer.Write(delta / 2 + mask, size - 1); + writer.Write(delta & 1, 1); + } + } +} + +bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) { + const bool is_lt = value < test; + writer.Write(!is_lt); + return is_lt; +} + +void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, + const std::array& new_prob, + const std::array& old_prob) { + // Note: There's 1 byte added on each packet for alignment, + // this byte is ignored when doing updates. + constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4; + + const auto needs_update = [&](s32 base_index) -> bool { + s32 index = base_index; + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 6; k++) { + for (s32 l = 0; l < 6; l++) { + if (new_prob[index + 0] != old_prob[index + 0] || + new_prob[index + 1] != old_prob[index + 1] || + new_prob[index + 2] != old_prob[index + 2]) { + return true; + } + + index += 4; + } + } + } + } + return false; + }; + + for (s32 block_index = 0; block_index < 4; block_index++) { + const s32 base_index = block_index * block_bytes; + const bool update = needs_update(base_index); + writer.Write(update); + + if (update) { + s32 index = base_index; + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 6; k++) { + for (s32 l = 0; l < 6; l++) { + if (k != 0 || l < 3) { + WriteProbabilityUpdate(writer, new_prob[index + 0], + old_prob[index + 0]); + WriteProbabilityUpdate(writer, new_prob[index + 1], + old_prob[index + 1]); + WriteProbabilityUpdate(writer, new_prob[index + 2], + old_prob[index + 2]); + } + index += 4; + } + } + } + } + } + + if (block_index == tx_mode) { + break; + } + } +} + +void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const bool update = new_prob != old_prob; + writer.Write(update, diff_update_probability); + + if (update) { + writer.Write(new_prob >> 1, 7); + } +} + +s32 VP9::CalcMinLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 min_log2 = 0; + + while ((64 << min_log2) < sb64_cols) { + min_log2++; + } + + return min_log2; +} + +s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) { + const s32 sb64_cols = (frameWidth + 63) / 64; + s32 max_log2 = 1; + + while ((sb64_cols >> max_log2) >= 4) { + max_log2++; + } + + return max_log2 - 1; +} + +Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { + PictureInfo picture_info{}; + gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); + Vp9PictureInfo vp9_info = picture_info.Convert(); + + InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); + + // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following + // order: last, golden, altref, current. It may be worthwhile to track the updates done here + // to avoid buffering frame data needed for reference frame updating in the header composition. + std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); + + return std::move(vp9_info); +} + +void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { + EntropyProbs entropy{}; + gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); + entropy.Convert(dst); +} + +Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { + Vp9FrameContainer frame{}; + { + gpu.SyncGuestHost(); + frame.info = std::move(GetVp9PictureInfo(state)); + + frame.bit_stream.resize(frame.info.bitstream_size); + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), + frame.info.bitstream_size); + } + // Buffer two frames, saving the last show frame info + if (next_next_frame.bit_stream.size() != 0) { + Vp9FrameContainer temp{ + .info = frame.info, + .bit_stream = frame.bit_stream, + }; + next_next_frame.info.show_frame = frame.info.last_frame_shown; + frame.info = next_next_frame.info; + frame.bit_stream = next_next_frame.bit_stream; + next_next_frame = std::move(temp); + + if (next_frame.bit_stream.size() != 0) { + Vp9FrameContainer temp{ + .info = frame.info, + .bit_stream = frame.bit_stream, + }; + next_frame.info.show_frame = frame.info.last_frame_shown; + frame.info = next_frame.info; + frame.bit_stream = next_frame.bit_stream; + next_frame = std::move(temp); + } else { + next_frame.info = frame.info; + next_frame.bit_stream = frame.bit_stream; + } + } else { + next_next_frame.info = frame.info; + next_next_frame.bit_stream = frame.bit_stream; + } + return frame; +} + +std::vector VP9::ComposeCompressedHeader() { + VpxRangeEncoder writer{}; + + if (!current_frame_info.lossless) { + if (static_cast(current_frame_info.transform_mode) >= 3) { + writer.Write(3, 2); + writer.Write(current_frame_info.transform_mode == 4); + } else { + writer.Write(current_frame_info.transform_mode, 2); + } + } + + if (current_frame_info.transform_mode == 4) { + // tx_mode_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob, + prev_frame_probs.tx_8x8_prob); + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob, + prev_frame_probs.tx_16x16_prob); + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob, + prev_frame_probs.tx_32x32_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob; + prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob; + prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob; + } + } + // read_coef_probs() in the spec + WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode, + current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs); + // read_skip_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs, + prev_frame_probs.skip_probs); + + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs; + prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs; + } + + if (!current_frame_info.intra_only) { + // read_inter_probs() in the spec + WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob, + prev_frame_probs.inter_mode_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; + } + + if (current_frame_info.interp_filter == 4) { + // read_interp_filter_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob, + prev_frame_probs.switchable_interp_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.switchable_interp_prob = + current_frame_info.entropy.switchable_interp_prob; + } + } + + // read_is_inter_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob, + prev_frame_probs.intra_inter_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; + } + // frame_reference_mode() in the spec + if ((current_frame_info.ref_frame_sign_bias[1] & 1) != + (current_frame_info.ref_frame_sign_bias[2] & 1) || + (current_frame_info.ref_frame_sign_bias[1] & 1) != + (current_frame_info.ref_frame_sign_bias[3] & 1)) { + if (current_frame_info.reference_mode >= 1) { + writer.Write(1, 1); + writer.Write(current_frame_info.reference_mode == 2); + } else { + writer.Write(0, 1); + } + } + + // frame_reference_mode_probs() in the spec + if (current_frame_info.reference_mode == 2) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob, + prev_frame_probs.comp_inter_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob; + } + } + + if (current_frame_info.reference_mode != 1) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob, + prev_frame_probs.single_ref_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob; + } + } + + if (current_frame_info.reference_mode != 0) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob, + prev_frame_probs.comp_ref_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob; + } + } + + // read_y_mode_probs + for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size(); + ++index) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index], + prev_frame_probs.y_mode_prob[index]); + } + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; + } + // read_partition_probs + WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob, + prev_frame_probs.partition_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; + } + + // mv_probs + for (s32 i = 0; i < 3; i++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i], + prev_frame_probs.joints[i]); + } + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.joints = current_frame_info.entropy.joints; + } + + for (s32 i = 0; i < 2; i++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i], + prev_frame_probs.sign[i]); + + for (s32 j = 0; j < 10; j++) { + const int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index], + prev_frame_probs.classes[index]); + } + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i], + prev_frame_probs.class_0[i]); + + for (s32 j = 0; j < 10; j++) { + const int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index], + prev_frame_probs.prob_bits[index]); + } + } + + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 3; k++) { + const int index = i * 2 * 3 + j * 3 + k; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index], + prev_frame_probs.class_0_fr[index]); + } + } + + for (s32 j = 0; j < 3; j++) { + const int index = i * 3 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index], + prev_frame_probs.fr[index]); + } + } + + if (current_frame_info.allow_high_precision_mv) { + for (s32 index = 0; index < 2; index++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index], + prev_frame_probs.class_0_hp[index]); + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index], + prev_frame_probs.high_precision[index]); + } + } + + // save previous probs + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.sign = current_frame_info.entropy.sign; + prev_frame_probs.classes = current_frame_info.entropy.classes; + prev_frame_probs.class_0 = current_frame_info.entropy.class_0; + prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits; + prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr; + prev_frame_probs.fr = current_frame_info.entropy.fr; + prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp; + prev_frame_probs.high_precision = current_frame_info.entropy.high_precision; + } + } + + writer.End(); + return writer.GetBuffer(); + + const auto writer_bytearray = writer.GetBuffer(); + + std::vector compressed_header(writer_bytearray.size()); + std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size()); + return compressed_header; +} + +VpxBitStreamWriter VP9::ComposeUncompressedHeader() { + VpxBitStreamWriter uncomp_writer{}; + + uncomp_writer.WriteU(2, 2); // Frame marker. + uncomp_writer.WriteU(0, 2); // Profile. + uncomp_writer.WriteBit(false); // Show existing frame. + uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame? + uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame? + uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience + + if (current_frame_info.is_key_frame) { + uncomp_writer.WriteU(frame_sync_code, 24); + uncomp_writer.WriteU(0, 3); // Color space. + uncomp_writer.WriteU(0, 1); // Color range. + uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); + uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); + uncomp_writer.WriteBit(false); // Render and frame size different. + + // Reset context + prev_frame_probs = default_probs; + swap_next_golden = false; + loop_filter_ref_deltas.fill(0); + loop_filter_mode_deltas.fill(0); + + // allow frames offsets to stabilize before checking for golden frames + grace_period = 4; + + // On key frames, all frame slots are set to the current frame, + // so the value of the selected slot doesn't really matter. + frame_ctxs.fill({current_frame_number, false, default_probs}); + + // intra only, meaning the frame can be recreated with no other references + current_frame_info.intra_only = true; + + } else { + std::array ref_frame_index; + + if (!current_frame_info.show_frame) { + uncomp_writer.WriteBit(current_frame_info.intra_only); + if (!current_frame_info.last_frame_was_key) { + swap_next_golden = !swap_next_golden; + } + } else { + current_frame_info.intra_only = false; + } + if (!current_frame_info.error_resilient_mode) { + uncomp_writer.WriteU(0, 2); // Reset frame context. + } + + // Last, Golden, Altref frames + ref_frame_index = std::array{0, 1, 2}; + + // set when next frame is hidden + // altref and golden references are swapped + if (swap_next_golden) { + ref_frame_index = std::array{0, 2, 1}; + } + + // update Last Frame + u64 refresh_frame_flags = 1; + + // golden frame may refresh, determined if the next golden frame offset is changed + bool golden_refresh = false; + if (grace_period <= 0) { + for (s32 index = 1; index < 3; ++index) { + if (current_frame_info.frame_offsets[index] != + next_frame.info.frame_offsets[index]) { + current_frame_info.refresh_frame[index] = true; + golden_refresh = true; + grace_period = 3; + } + } + } + + if (current_frame_info.show_frame && + (!next_frame.info.show_frame || next_frame.info.is_key_frame)) { + // Update golden frame + refresh_frame_flags = swap_next_golden ? 2 : 4; + } + + if (!current_frame_info.show_frame) { + // Update altref + refresh_frame_flags = swap_next_golden ? 2 : 4; + } else if (golden_refresh) { + refresh_frame_flags = 3; + } + + if (current_frame_info.intra_only) { + uncomp_writer.WriteU(frame_sync_code, 24); + uncomp_writer.WriteU(static_cast(refresh_frame_flags), 8); + uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); + uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); + uncomp_writer.WriteBit(false); // Render and frame size different. + } else { + uncomp_writer.WriteU(static_cast(refresh_frame_flags), 8); + + for (s32 index = 1; index < 4; index++) { + uncomp_writer.WriteU(ref_frame_index[index - 1], 3); + uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1); + } + + uncomp_writer.WriteBit(true); // Frame size with refs. + uncomp_writer.WriteBit(false); // Render and frame size different. + uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv); + uncomp_writer.WriteBit(current_frame_info.interp_filter == 4); + + if (current_frame_info.interp_filter != 4) { + uncomp_writer.WriteU(current_frame_info.interp_filter, 2); + } + } + } + + if (!current_frame_info.error_resilient_mode) { + uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from? + uncomp_writer.WriteBit(true); // Frame parallel decoding mode. + } + + int frame_ctx_idx = 0; + if (!current_frame_info.show_frame) { + frame_ctx_idx = 1; + } + + uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index. + prev_frame_probs = + frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header + frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy}; + + uncomp_writer.WriteU(current_frame_info.first_level, 6); + uncomp_writer.WriteU(current_frame_info.sharpness_level, 3); + uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled); + + if (current_frame_info.mode_ref_delta_enabled) { + // check if ref deltas are different, update accordingly + std::array update_loop_filter_ref_deltas; + std::array update_loop_filter_mode_deltas; + + bool loop_filter_delta_update = false; + + for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { + const s8 old_deltas = loop_filter_ref_deltas[index]; + const s8 new_deltas = current_frame_info.ref_deltas[index]; + + loop_filter_delta_update |= + (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas); + } + + for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { + const s8 old_deltas = loop_filter_mode_deltas[index]; + const s8 new_deltas = current_frame_info.mode_deltas[index]; + + loop_filter_delta_update |= + (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas); + } + + uncomp_writer.WriteBit(loop_filter_delta_update); + + if (loop_filter_delta_update) { + for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { + uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]); + + if (update_loop_filter_ref_deltas[index]) { + uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6); + } + } + + for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { + uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]); + + if (update_loop_filter_mode_deltas[index]) { + uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6); + } + } + // save new deltas + loop_filter_ref_deltas = current_frame_info.ref_deltas; + loop_filter_mode_deltas = current_frame_info.mode_deltas; + } + } + + uncomp_writer.WriteU(current_frame_info.base_q_index, 8); + + uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q); + uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); + uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); + + uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). + + const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); + const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width); + + const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2; + const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1; + + // If it's less than the maximum, we need to add an extra 0 on the bitstream + // to indicate that it should stop reading. + if (current_frame_info.log2_tile_cols < max_tile_cols_log2) { + uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1); + } else { + uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff); + } + + const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0; + + uncomp_writer.WriteBit(tile_rows_log2_is_nonzero); + + if (tile_rows_log2_is_nonzero) { + uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1); + } + + return uncomp_writer; +} + +std::vector& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { + std::vector bitstream; + { + Vp9FrameContainer curr_frame = GetCurrentFrame(state); + current_frame_info = curr_frame.info; + bitstream = curr_frame.bit_stream; + } + + // The uncompressed header routine sets PrevProb parameters needed for the compressed header + auto uncomp_writer = ComposeUncompressedHeader(); + std::vector compressed_header = ComposeCompressedHeader(); + + uncomp_writer.WriteU(static_cast(compressed_header.size()), 16); + uncomp_writer.Flush(); + std::vector uncompressed_header = uncomp_writer.GetByteArray(); + + // Write headers and frame to buffer + frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); + std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size()); + std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(), + compressed_header.size()); + std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(), + bitstream.data(), bitstream.size()); + + // keep track of frame number + current_frame_number++; + grace_period--; + + // don't display hidden frames + hidden = !current_frame_info.show_frame; + return frame; +} + +VpxRangeEncoder::VpxRangeEncoder() { + Write(false); +} + +VpxRangeEncoder::~VpxRangeEncoder() = default; + +void VpxRangeEncoder::Write(s32 value, s32 value_size) { + for (s32 bit = value_size - 1; bit >= 0; bit--) { + Write(((value >> bit) & 1) != 0); + } +} + +void VpxRangeEncoder::Write(bool bit) { + Write(bit, half_probability); +} + +void VpxRangeEncoder::Write(bool bit, s32 probability) { + u32 local_range = range; + const u32 split = 1 + (((local_range - 1) * static_cast(probability)) >> 8); + local_range = split; + + if (bit) { + low_value += split; + local_range = range - split; + } + + s32 shift = norm_lut[local_range]; + local_range <<= shift; + count += shift; + + if (count >= 0) { + const s32 offset = shift - count; + + if (((low_value << (offset - 1)) >> 31) != 0) { + const s32 current_pos = static_cast(base_stream.GetPosition()); + base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); + while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) { + base_stream.WriteByte(0); + + base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos); + } + base_stream.WriteByte(static_cast((PeekByte() + 1))); + base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin); + } + base_stream.WriteByte(static_cast((low_value >> (24 - offset)))); + + low_value <<= offset; + shift = count; + low_value &= 0xffffff; + count -= 8; + } + + low_value <<= shift; + range = local_range; +} + +void VpxRangeEncoder::End() { + for (std::size_t index = 0; index < 32; ++index) { + Write(false); + } +} + +u8 VpxRangeEncoder::PeekByte() { + const u8 value = base_stream.ReadByte(); + base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); + + return value; +} + +VpxBitStreamWriter::VpxBitStreamWriter() = default; + +VpxBitStreamWriter::~VpxBitStreamWriter() = default; + +void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) { + WriteBits(value, value_size); +} + +void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) { + const bool sign = value < 0; + if (sign) { + value = -value; + } + + WriteBits(static_cast(value << 1) | (sign ? 1 : 0), value_size + 1); +} + +void VpxBitStreamWriter::WriteDeltaQ(u32 value) { + const bool delta_coded = value != 0; + WriteBit(delta_coded); + + if (delta_coded) { + WriteBits(value, 4); + } +} + +void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) { + s32 value_pos = 0; + s32 remaining = bit_count; + + while (remaining > 0) { + s32 copy_size = remaining; + + const s32 free = GetFreeBufferBits(); + + if (copy_size > free) { + copy_size = free; + } + + const s32 mask = (1 << copy_size) - 1; + + const s32 src_shift = (bit_count - value_pos) - copy_size; + const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; + + buffer |= ((value >> src_shift) & mask) << dst_shift; + + value_pos += copy_size; + buffer_pos += copy_size; + remaining -= copy_size; + } +} + +void VpxBitStreamWriter::WriteBit(bool state) { + WriteBits(state ? 1 : 0, 1); +} + +s32 VpxBitStreamWriter::GetFreeBufferBits() { + if (buffer_pos == buffer_size) { + Flush(); + } + + return buffer_size - buffer_pos; +} + +void VpxBitStreamWriter::Flush() { + if (buffer_pos == 0) { + return; + } + byte_array.push_back(static_cast(buffer)); + buffer = 0; + buffer_pos = 0; +} + +std::vector& VpxBitStreamWriter::GetByteArray() { + return byte_array; +} + +const std::vector& VpxBitStreamWriter::GetByteArray() const { + return byte_array; +} + +} // namespace Tegra::Decoder diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h new file mode 100644 index 000000000..748e11bae --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.h @@ -0,0 +1,216 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/stream.h" +#include "video_core/command_classes/codecs/vp9_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; +enum class FrameType { KeyFrame = 0, InterFrame = 1 }; +namespace Decoder { + +/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the +/// VP9 header bitstreams. + +class VpxRangeEncoder { +public: + VpxRangeEncoder(); + ~VpxRangeEncoder(); + + /// Writes the rightmost value_size bits from value into the stream + void Write(s32 value, s32 value_size); + + /// Writes a single bit with half probability + void Write(bool bit); + + /// Writes a bit to the base_stream encoded with probability + void Write(bool bit, s32 probability); + + /// Signal the end of the bitstream + void End(); + + std::vector& GetBuffer() { + return base_stream.GetBuffer(); + } + + const std::vector& GetBuffer() const { + return base_stream.GetBuffer(); + } + +private: + u8 PeekByte(); + Common::Stream base_stream{}; + u32 low_value{}; + u32 range{0xff}; + s32 count{-24}; + s32 half_probability{128}; + static constexpr std::array norm_lut{ + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; +}; + +class VpxBitStreamWriter { +public: + VpxBitStreamWriter(); + ~VpxBitStreamWriter(); + + /// Write an unsigned integer value + void WriteU(u32 value, u32 value_size); + + /// Write a signed integer value + void WriteS(s32 value, u32 value_size); + + /// Based on 6.2.10 of VP9 Spec, writes a delta coded value + void WriteDeltaQ(u32 value); + + /// Write a single bit. + void WriteBit(bool state); + + /// Pushes current buffer into buffer_array, resets buffer + void Flush(); + + /// Returns byte_array + std::vector& GetByteArray(); + + /// Returns const byte_array + const std::vector& GetByteArray() const; + +private: + /// Write bit_count bits from value into buffer + void WriteBits(u32 value, u32 bit_count); + + /// Gets next available position in buffer, invokes Flush() if buffer is full + s32 GetFreeBufferBits(); + + s32 buffer_size{8}; + + s32 buffer{}; + s32 buffer_pos{}; + std::vector byte_array; +}; + +class VP9 { +public: + explicit VP9(GPU& gpu); + ~VP9(); + + /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec + /// documentation + std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); + + /// Returns true if the most recent frame was a hidden frame. + bool WasFrameHidden() const { + return hidden; + } + +private: + /// Generates compressed header probability updates in the bitstream writer + template + void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array& new_prob, + const std::array& old_prob); + + /// Generates compressed header probability updates in the bitstream writer + /// If probs are not equal, WriteProbabilityDelta is invoked + void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// Generates compressed header probability deltas in the bitstream writer + void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification + s32 RemapProbability(s32 new_prob, s32 old_prob); + + /// Recenters probability. Based on section 6.3.6 of VP9 Specification + s32 RecenterNonNeg(s32 new_prob, s32 old_prob); + + /// Inverse of 6.3.4 Decode term subexp + void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); + + /// Writes if the value is less than the test value + bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test); + + /// Writes probability updates for the Coef probabilities + void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, + const std::array& new_prob, + const std::array& old_prob); + + /// Write probabilities for 4-byte aligned structures + template + void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array& new_prob, + const std::array& old_prob); + + /// Write motion vector probability updates. 6.3.17 in the spec + void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// 6.2.14 Tile size calculation + s32 CalcMinLog2TileCols(s32 frame_width); + s32 CalcMaxLog2TileCols(s32 frame_width); + + /// Returns VP9 information from NVDEC provided offset and size + Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); + + /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct + void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); + + /// Returns frame to be decoded after buffering + Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); + + /// Use NVDEC providied information to compose the headers for the current frame + std::vector ComposeCompressedHeader(); + VpxBitStreamWriter ComposeUncompressedHeader(); + + GPU& gpu; + std::vector frame; + + std::array loop_filter_ref_deltas{}; + std::array loop_filter_mode_deltas{}; + + bool hidden; + s64 current_frame_number = -2; // since we buffer 2 frames + s32 grace_period = 6; // frame offsets need to stabilize + std::array frame_ctxs{}; + Vp9FrameContainer next_frame{}; + Vp9FrameContainer next_next_frame{}; + bool swap_next_golden{}; + + Vp9PictureInfo current_frame_info{}; + Vp9EntropyProbs prev_frame_probs{}; + + s32 diff_update_probability = 252; + s32 frame_sync_code = 0x498342; + static constexpr std::array map_lut = { + 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, + 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, + 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, + 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, + 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, + }; +}; + +} // namespace Decoder +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h new file mode 100644 index 000000000..8688fdac0 --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -0,0 +1,369 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/cityhash.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; + +namespace Decoder { +struct Vp9FrameDimensions { + s16 width{}; + s16 height{}; + s16 luma_pitch{}; + s16 chroma_pitch{}; +}; +static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); + +enum FrameFlags : u32 { + IsKeyFrame = 1 << 0, + LastFrameIsKeyFrame = 1 << 1, + FrameSizeChanged = 1 << 2, + ErrorResilientMode = 1 << 3, + LastShowFrame = 1 << 4, + IntraOnly = 1 << 5, +}; + +enum class MvJointType { + MvJointZero = 0, /* Zero vector */ + MvJointHnzvz = 1, /* Vert zero, hor nonzero */ + MvJointHzvnz = 2, /* Hor zero, vert nonzero */ + MvJointHnzvnz = 3, /* Both components nonzero */ +}; +enum class MvClassType { + MvClass0 = 0, /* (0, 2] integer pel */ + MvClass1 = 1, /* (2, 4] integer pel */ + MvClass2 = 2, /* (4, 8] integer pel */ + MvClass3 = 3, /* (8, 16] integer pel */ + MvClass4 = 4, /* (16, 32] integer pel */ + MvClass5 = 5, /* (32, 64] integer pel */ + MvClass6 = 6, /* (64, 128] integer pel */ + MvClass7 = 7, /* (128, 256] integer pel */ + MvClass8 = 8, /* (256, 512] integer pel */ + MvClass9 = 9, /* (512, 1024] integer pel */ + MvClass10 = 10, /* (1024,2048] integer pel */ +}; + +enum class BlockSize { + Block4x4 = 0, + Block4x8 = 1, + Block8x4 = 2, + Block8x8 = 3, + Block8x16 = 4, + Block16x8 = 5, + Block16x16 = 6, + Block16x32 = 7, + Block32x16 = 8, + Block32x32 = 9, + Block32x64 = 10, + Block64x32 = 11, + Block64x64 = 12, + BlockSizes = 13, + BlockInvalid = BlockSizes +}; + +enum class PredictionMode { + DcPred = 0, // Average of above and left pixels + VPred = 1, // Vertical + HPred = 2, // Horizontal + D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) + D135Pred = 4, // Directional 135 deg = 180 - 45 + D117Pred = 5, // Directional 117 deg = 180 - 63 + D153Pred = 6, // Directional 153 deg = 180 - 27 + D207Pred = 7, // Directional 207 deg = 180 + 27 + D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) + TmPred = 9, // True-motion + NearestMv = 10, + NearMv = 11, + ZeroMv = 12, + NewMv = 13, + MbModeCount = 14 +}; + +enum class TxSize { + Tx4x4 = 0, // 4x4 transform + Tx8x8 = 1, // 8x8 transform + Tx16x16 = 2, // 16x16 transform + Tx32x32 = 3, // 32x32 transform + TxSizes = 4 +}; + +enum class TxMode { + Only4X4 = 0, // Only 4x4 transform used + Allow8X8 = 1, // Allow block transform size up to 8x8 + Allow16X16 = 2, // Allow block transform size up to 16x16 + Allow32X32 = 3, // Allow block transform size up to 32x32 + TxModeSelect = 4, // Transform specified for each block + TxModes = 5 +}; + +enum class reference_mode { + SingleReference = 0, + CompoundReference = 1, + ReferenceModeSelect = 2, + ReferenceModes = 3 +}; + +struct Segmentation { + u8 enabled{}; + u8 update_map{}; + u8 temporal_update{}; + u8 abs_delta{}; + std::array feature_mask{}; + std::array, 8> feature_data{}; +}; +static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); + +struct LoopFilter { + u8 mode_ref_delta_enabled{}; + std::array ref_deltas{}; + std::array mode_deltas{}; +}; +static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); + +struct Vp9EntropyProbs { + std::array y_mode_prob{}; + std::array partition_prob{}; + std::array coef_probs{}; + std::array switchable_interp_prob{}; + std::array inter_mode_prob{}; + std::array intra_inter_prob{}; + std::array comp_inter_prob{}; + std::array single_ref_prob{}; + std::array comp_ref_prob{}; + std::array tx_32x32_prob{}; + std::array tx_16x16_prob{}; + std::array tx_8x8_prob{}; + std::array skip_probs{}; + std::array joints{}; + std::array sign{}; + std::array classes{}; + std::array class_0{}; + std::array prob_bits{}; + std::array class_0_fr{}; + std::array fr{}; + std::array class_0_hp{}; + std::array high_precision{}; +}; +static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size"); + +struct Vp9PictureInfo { + bool is_key_frame{}; + bool intra_only{}; + bool last_frame_was_key{}; + bool frame_size_changed{}; + bool error_resilient_mode{}; + bool last_frame_shown{}; + bool show_frame{}; + std::array ref_frame_sign_bias{}; + s32 base_q_index{}; + s32 y_dc_delta_q{}; + s32 uv_dc_delta_q{}; + s32 uv_ac_delta_q{}; + bool lossless{}; + s32 transform_mode{}; + bool allow_high_precision_mv{}; + s32 interp_filter{}; + s32 reference_mode{}; + s8 comp_fixed_ref{}; + std::array comp_var_ref{}; + s32 log2_tile_cols{}; + s32 log2_tile_rows{}; + bool segment_enabled{}; + bool segment_map_update{}; + bool segment_map_temporal_update{}; + s32 segment_abs_delta{}; + std::array segment_feature_enable{}; + std::array, 8> segment_feature_data{}; + bool mode_ref_delta_enabled{}; + bool use_prev_in_find_mv_refs{}; + std::array ref_deltas{}; + std::array mode_deltas{}; + Vp9EntropyProbs entropy{}; + Vp9FrameDimensions frame_size{}; + u8 first_level{}; + u8 sharpness_level{}; + u32 bitstream_size{}; + std::array frame_offsets{}; + std::array refresh_frame{}; +}; + +struct Vp9FrameContainer { + Vp9PictureInfo info{}; + std::vector bit_stream; +}; + +struct PictureInfo { + INSERT_PADDING_WORDS(12); + u32 bitstream_size{}; + INSERT_PADDING_WORDS(5); + Vp9FrameDimensions last_frame_size{}; + Vp9FrameDimensions golden_frame_size{}; + Vp9FrameDimensions alt_frame_size{}; + Vp9FrameDimensions current_frame_size{}; + u32 vp9_flags{}; + std::array ref_frame_sign_bias{}; + u8 first_level{}; + u8 sharpness_level{}; + u8 base_q_index{}; + u8 y_dc_delta_q{}; + u8 uv_ac_delta_q{}; + u8 uv_dc_delta_q{}; + u8 lossless{}; + u8 tx_mode{}; + u8 allow_high_precision_mv{}; + u8 interp_filter{}; + u8 reference_mode{}; + s8 comp_fixed_ref{}; + std::array comp_var_ref{}; + u8 log2_tile_cols{}; + u8 log2_tile_rows{}; + Segmentation segmentation{}; + LoopFilter loop_filter{}; + INSERT_PADDING_BYTES(5); + u32 surface_params{}; + INSERT_PADDING_WORDS(3); + + Vp9PictureInfo Convert() const { + + return Vp9PictureInfo{ + .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, + .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, + .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, + .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, + .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, + .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .ref_frame_sign_bias = ref_frame_sign_bias, + .base_q_index = base_q_index, + .y_dc_delta_q = y_dc_delta_q, + .uv_dc_delta_q = uv_dc_delta_q, + .uv_ac_delta_q = uv_ac_delta_q, + .lossless = lossless != 0, + .transform_mode = tx_mode, + .allow_high_precision_mv = allow_high_precision_mv != 0, + .interp_filter = interp_filter, + .reference_mode = reference_mode, + .comp_fixed_ref = comp_fixed_ref, + .comp_var_ref = comp_var_ref, + .log2_tile_cols = log2_tile_cols, + .log2_tile_rows = log2_tile_rows, + .segment_enabled = segmentation.enabled != 0, + .segment_map_update = segmentation.update_map != 0, + .segment_map_temporal_update = segmentation.temporal_update != 0, + .segment_abs_delta = segmentation.abs_delta, + .segment_feature_enable = segmentation.feature_mask, + .segment_feature_data = segmentation.feature_data, + .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, + .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && + !(vp9_flags == (FrameFlags::FrameSizeChanged)) && + !(vp9_flags == (FrameFlags::IntraOnly)) && + (vp9_flags == (FrameFlags::LastShowFrame)) && + !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), + .ref_deltas = loop_filter.ref_deltas, + .mode_deltas = loop_filter.mode_deltas, + .frame_size = current_frame_size, + .first_level = first_level, + .sharpness_level = sharpness_level, + .bitstream_size = bitstream_size, + }; + } +}; +static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); + +struct EntropyProbs { + INSERT_PADDING_BYTES(1024); + std::array, 7> inter_mode_prob{}; + std::array intra_inter_prob{}; + INSERT_PADDING_BYTES(80); + std::array, 2> tx_8x8_prob{}; + std::array, 2> tx_16x16_prob{}; + std::array, 2> tx_32x32_prob{}; + std::array y_mode_prob_e8{}; + std::array, 4> y_mode_prob_e0e7{}; + INSERT_PADDING_BYTES(64); + std::array, 16> partition_prob{}; + INSERT_PADDING_BYTES(10); + std::array, 4> switchable_interp_prob{}; + std::array comp_inter_prob{}; + std::array skip_probs{}; + std::array joints{}; + std::array sign{}; + std::array, 2> class_0{}; + std::array, 2> fr{}; + std::array class_0_hp{}; + std::array high_precision{}; + std::array, 2> classes{}; + std::array, 2>, 2> class_0_fr{}; + std::array, 2> pred_bits{}; + std::array, 5> single_ref_prob{}; + std::array comp_ref_prob{}; + INSERT_PADDING_BYTES(17); + std::array, 6>, 6>, 2>, 2>, 4> + coef_probs{}; + + void Convert(Vp9EntropyProbs& fc) { + std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size()); + + std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(), + fc.intra_inter_prob.size()); + + std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size()); + std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size()); + std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size()); + + for (s32 i = 0; i < 4; i++) { + for (s32 j = 0; j < 9; j++) { + fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i]; + } + } + + std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size()); + + std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(), + fc.switchable_interp_prob.size()); + std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size()); + std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size()); + + std::memcpy(fc.joints.data(), joints.data(), fc.joints.size()); + + std::memcpy(fc.sign.data(), sign.data(), fc.sign.size()); + std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size()); + std::memcpy(fc.fr.data(), fr.data(), fc.fr.size()); + std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size()); + std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size()); + std::memcpy(fc.classes.data(), classes.data(), fc.classes.size()); + std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size()); + std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size()); + std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size()); + std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size()); + + std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size()); + } +}; +static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size"); + +enum class Ref { Last, Golden, AltRef }; + +struct RefPoolElement { + s64 frame{}; + Ref ref{}; + bool refresh{}; +}; + +struct FrameContexts { + s64 from{}; + bool adapted{}; + Vp9EntropyProbs probs{}; +}; + +}; // namespace Decoder +}; // namespace Tegra diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp new file mode 100644 index 000000000..a5234ee47 --- /dev/null +++ b/src/video_core/command_classes/host1x.cpp @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/command_classes/host1x.h" +#include "video_core/gpu.h" + +Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} + +Tegra::Host1x::~Host1x() = default; + +void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { + u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u32); + std::memcpy(state_offset, &arguments, sizeof(u32)); +} + +void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector& arguments) { + StateWrite(static_cast(method), arguments[0]); + switch (method) { + case Method::WaitSyncpt: + Execute(arguments[0]); + break; + case Method::LoadSyncptPayload32: + syncpoint_value = arguments[0]; + break; + case Method::WaitSyncpt32: + Execute(arguments[0]); + break; + default: + UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast(method)); + break; + } +} + +void Tegra::Host1x::Execute(u32 data) { + // This method waits on a valid syncpoint. + // TODO: Implement when proper Async is in place +} diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h new file mode 100644 index 000000000..501a5ed2e --- /dev/null +++ b/src/video_core/command_classes/host1x.h @@ -0,0 +1,78 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class GPU; +class Nvdec; + +class Host1x { +public: + struct Host1xClassRegisters { + u32 incr_syncpt{}; + u32 incr_syncpt_ctrl{}; + u32 incr_syncpt_error{}; + INSERT_PADDING_WORDS(5); + u32 wait_syncpt{}; + u32 wait_syncpt_base{}; + u32 wait_syncpt_incr{}; + u32 load_syncpt_base{}; + u32 incr_syncpt_base{}; + u32 clear{}; + u32 wait{}; + u32 wait_with_interrupt{}; + u32 delay_use{}; + u32 tick_count_high{}; + u32 tick_count_low{}; + u32 tick_ctrl{}; + INSERT_PADDING_WORDS(23); + u32 ind_ctrl{}; + u32 ind_off2{}; + u32 ind_off{}; + std::array ind_data{}; + INSERT_PADDING_WORDS(1); + u32 load_syncpoint_payload32{}; + u32 stall_ctrl{}; + u32 wait_syncpt32{}; + u32 wait_syncpt_base32{}; + u32 load_syncpt_base32{}; + u32 incr_syncpt_base32{}; + u32 stall_count_high{}; + u32 stall_count_low{}; + u32 xref_ctrl{}; + u32 channel_xref_high{}; + u32 channel_xref_low{}; + }; + static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); + + enum class Method : u32 { + WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, + LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, + WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + }; + + explicit Host1x(GPU& gpu); + ~Host1x(); + + /// Writes the method into the state, Invoke Execute() if encountered + void ProcessMethod(Host1x::Method method, const std::vector& arguments); + +private: + /// For Host1x, execute is waiting on a syncpoint previously written into the state + void Execute(u32 data); + + /// Write argument into the provided offset + void StateWrite(u32 offset, u32 arguments); + + u32 syncpoint_value{}; + Host1xClassRegisters state{}; + GPU& gpu; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp new file mode 100644 index 000000000..ede9466eb --- /dev/null +++ b/src/video_core/command_classes/nvdec.cpp @@ -0,0 +1,56 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "common/bit_util.h" +#include "core/memory.h" +#include "video_core/command_classes/nvdec.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra { + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique(gpu)) {} + +Nvdec::~Nvdec() = default; + +void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector& arguments) { + if (method == Method::SetVideoCodec) { + codec->StateWrite(static_cast(method), arguments[0]); + } else { + codec->StateWrite(static_cast(method), static_cast(arguments[0]) << 8); + } + + switch (method) { + case Method::SetVideoCodec: + codec->SetTargetCodec(static_cast(arguments[0])); + break; + case Method::Execute: + Execute(); + break; + } +} + +AVFrame* Nvdec::GetFrame() { + return codec->GetCurrentFrame(); +} + +const AVFrame* Nvdec::GetFrame() const { + return codec->GetCurrentFrame(); +} + +void Nvdec::Execute() { + switch (codec->GetCurrentCodec()) { + case NvdecCommon::VideoCodec::H264: + case NvdecCommon::VideoCodec::Vp9: + codec->Decode(); + break; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", static_cast(codec->GetCurrentCodec())); + break; + } +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h new file mode 100644 index 000000000..c1a9d843e --- /dev/null +++ b/src/video_core/command_classes/nvdec.h @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/codecs/codec.h" + +namespace Tegra { +class GPU; + +class Nvdec { +public: + enum class Method : u32 { + SetVideoCodec = 0x80, + Execute = 0xc0, + }; + + explicit Nvdec(GPU& gpu); + ~Nvdec(); + + /// Writes the method into the state, Invoke Execute() if encountered + void ProcessMethod(Nvdec::Method method, const std::vector& arguments); + + /// Return most recently decoded frame + AVFrame* GetFrame(); + const AVFrame* GetFrame() const; + +private: + /// Invoke codec to decode a frame + void Execute(); + + GPU& gpu; + std::unique_ptr codec; +}; +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h new file mode 100644 index 000000000..01b5e086d --- /dev/null +++ b/src/video_core/command_classes/nvdec_common.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra::NvdecCommon { + +struct NvdecRegisters { + INSERT_PADDING_WORDS(256); + u64 set_codec_id{}; + INSERT_PADDING_WORDS(254); + u64 set_platform_id{}; + u64 picture_info_offset{}; + u64 frame_bitstream_offset{}; + u64 frame_number{}; + u64 h264_slice_data_offsets{}; + u64 h264_mv_dump_offset{}; + INSERT_PADDING_WORDS(6); + u64 frame_stats_offset{}; + u64 h264_last_surface_luma_offset{}; + u64 h264_last_surface_chroma_offset{}; + std::array surface_luma_offset{}; + std::array surface_chroma_offset{}; + INSERT_PADDING_WORDS(132); + u64 vp9_entropy_probs_offset{}; + u64 vp9_backward_updates_offset{}; + u64 vp9_last_frame_segmap_offset{}; + u64 vp9_curr_frame_segmap_offset{}; + INSERT_PADDING_WORDS(2); + u64 vp9_last_frame_mvs_offset{}; + u64 vp9_curr_frame_mvs_offset{}; + INSERT_PADDING_WORDS(2); +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +enum class VideoCodec : u32 { + None = 0x0, + H264 = 0x3, + Vp8 = 0x5, + H265 = 0x7, + Vp9 = 0x9, +}; + +} // namespace Tegra::NvdecCommon diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp new file mode 100644 index 000000000..a0ab44855 --- /dev/null +++ b/src/video_core/command_classes/sync_manager.cpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include +#include "sync_manager.h" +#include "video_core/gpu.h" + +namespace Tegra { +SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} +SyncptIncrManager::~SyncptIncrManager() = default; + +void SyncptIncrManager::Increment(u32 id) { + increments.push_back(SyncptIncr{0, id, true}); + IncrementAllDone(); +} + +u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { + const u32 handle = current_id++; + increments.push_back(SyncptIncr{handle, class_id, id}); + return handle; +} + +void SyncptIncrManager::SignalDone(u32 handle) { + auto done_incr = std::find_if(increments.begin(), increments.end(), + [handle](SyncptIncr incr) { return incr.id == handle; }); + if (done_incr != increments.end()) { + const SyncptIncr incr = *done_incr; + *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true}; + } + IncrementAllDone(); +} + +void SyncptIncrManager::IncrementAllDone() { + std::size_t done_count = 0; + for (; done_count < increments.size(); ++done_count) { + if (!increments[done_count].complete) { + break; + } + gpu.IncrementSyncPoint(increments[done_count].syncpt_id); + } + increments.erase(increments.begin(), increments.begin() + done_count); +} +} // namespace Tegra diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h new file mode 100644 index 000000000..353b67573 --- /dev/null +++ b/src/video_core/command_classes/sync_manager.h @@ -0,0 +1,64 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#pragma once + +#include +#include +#include "common/common_types.h" + +namespace Tegra { +class GPU; +struct SyncptIncr { + u32 id; + u32 class_id; + u32 syncpt_id; + bool complete; + + SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false) + : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} +}; + +class SyncptIncrManager { +public: + explicit SyncptIncrManager(GPU& gpu); + ~SyncptIncrManager(); + + /// Add syncpoint id and increment all + void Increment(u32 id); + + /// Returns a handle to increment later + u32 IncrementWhenDone(u32 class_id, u32 id); + + /// IncrememntAllDone, including handle + void SignalDone(u32 handle); + + /// Increment all sequential pending increments that are already done. + void IncrementAllDone(); + +private: + std::vector increments; + std::mutex increment_lock; + u32 current_id{}; + + GPU& gpu; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp new file mode 100644 index 000000000..66e15a1a8 --- /dev/null +++ b/src/video_core/command_classes/vic.cpp @@ -0,0 +1,180 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "video_core/command_classes/nvdec.h" +#include "video_core/command_classes/vic.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/texture_cache/surface_params.h" + +extern "C" { +#include +} + +namespace Tegra { + +Vic::Vic(GPU& gpu_, std::shared_ptr nvdec_processor_) + : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} +Vic::~Vic() = default; + +void Vic::VicStateWrite(u32 offset, u32 arguments) { + u8* const state_offset = reinterpret_cast(&vic_state) + offset * sizeof(u32); + std::memcpy(state_offset, &arguments, sizeof(u32)); +} + +void Vic::ProcessMethod(Vic::Method method, const std::vector& arguments) { + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); + VicStateWrite(static_cast(method), arguments[0]); + const u64 arg = static_cast(arguments[0]) << 8; + switch (method) { + case Method::Execute: + Execute(); + break; + case Method::SetConfigStructOffset: + config_struct_address = arg; + break; + case Method::SetOutputSurfaceLumaOffset: + output_surface_luma_address = arg; + break; + case Method::SetOutputSurfaceChromaUOffset: + output_surface_chroma_u_address = arg; + break; + case Method::SetOutputSurfaceChromaVOffset: + output_surface_chroma_v_address = arg; + break; + default: + break; + } +} + +void Vic::Execute() { + if (output_surface_luma_address == 0) { + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + vic_state.output_surface.luma_offset); + return; + } + const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; + const VideoPixelFormat pixel_format = + static_cast(config.pixel_format.Value()); + switch (pixel_format) { + case VideoPixelFormat::BGRA8: + case VideoPixelFormat::RGBA8: { + LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + if (scaler_ctx == nullptr || frame->width != scaler_width || + frame->height != scaler_height) { + const AVPixelFormat target_format = + (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; + + sws_freeContext(scaler_ctx); + scaler_ctx = nullptr; + + // FFmpeg returns all frames in YUV420, convert it into expected format + scaler_ctx = + sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width, + frame->height, target_format, 0, nullptr, nullptr, nullptr); + + scaler_width = frame->width; + scaler_height = frame->height; + } + // Get Converted frame + const std::size_t linear_size = frame->width * frame->height * 4; + + using AVMallocPtr = std::unique_ptr; + AVMallocPtr converted_frame_buffer{static_cast(av_malloc(linear_size)), av_free}; + + const int converted_stride{frame->width * 4}; + u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; + + sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, + &converted_frame_buf_addr, &converted_stride); + + const u32 blk_kind = static_cast(config.block_linear_kind); + if (blk_kind != 0) { + // swizzle pitch linear to block linear + const u32 block_height = static_cast(config.block_linear_height_log2); + const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, + block_height, 0); + std::vector swizzled_data(size); + Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, + swizzled_data.data(), converted_frame_buffer.get(), + false, block_height, 0, 1); + + gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); + gpu.Maxwell3D().OnMemoryWrite(); + } else { + // send pitch linear frame + gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, + linear_size); + gpu.Maxwell3D().OnMemoryWrite(); + } + break; + } + case VideoPixelFormat::Yuv420: { + LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); + + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + + const std::size_t surface_width = config.surface_width_minus1 + 1; + const std::size_t surface_height = config.surface_height_minus1 + 1; + const std::size_t half_width = surface_width / 2; + const std::size_t half_height = config.surface_height_minus1 / 2; + const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; + + const auto* luma_ptr = frame->data[0]; + const auto* chroma_b_ptr = frame->data[1]; + const auto* chroma_r_ptr = frame->data[2]; + const auto stride = frame->linesize[0]; + const auto half_stride = frame->linesize[1]; + + std::vector luma_buffer(aligned_width * surface_height); + std::vector chroma_buffer(aligned_width * half_height); + + // Populate luma buffer + for (std::size_t y = 0; y < surface_height - 1; ++y) { + std::size_t src = y * stride; + std::size_t dst = y * aligned_width; + + std::size_t size = surface_width; + + for (std::size_t offset = 0; offset < size; ++offset) { + luma_buffer[dst + offset] = luma_ptr[src + offset]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), + luma_buffer.size()); + + // Populate chroma buffer from both channels with interleaving. + for (std::size_t y = 0; y < half_height; ++y) { + std::size_t src = y * half_stride; + std::size_t dst = y * aligned_width; + + for (std::size_t x = 0; x < half_width; ++x) { + chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; + chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), + chroma_buffer.size()); + gpu.Maxwell3D().OnMemoryWrite(); + break; + } + default: + UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); + break; + } +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h new file mode 100644 index 000000000..dd0a2aed8 --- /dev/null +++ b/src/video_core/command_classes/vic.h @@ -0,0 +1,110 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" + +struct SwsContext; + +namespace Tegra { +class GPU; +class Nvdec; + +struct PlaneOffsets { + u32 luma_offset{}; + u32 chroma_u_offset{}; + u32 chroma_v_offset{}; +}; + +struct VicRegisters { + INSERT_PADDING_WORDS(64); + u32 nop{}; + INSERT_PADDING_WORDS(15); + u32 pm_trigger{}; + INSERT_PADDING_WORDS(47); + u32 set_application_id{}; + u32 set_watchdog_timer{}; + INSERT_PADDING_WORDS(17); + u32 context_save_area{}; + u32 context_switch{}; + INSERT_PADDING_WORDS(43); + u32 execute{}; + INSERT_PADDING_WORDS(63); + std::array, 8> surfacex_slots{}; + u32 picture_index{}; + u32 control_params{}; + u32 config_struct_offset{}; + u32 filter_struct_offset{}; + u32 palette_offset{}; + u32 hist_offset{}; + u32 context_id{}; + u32 fce_ucode_size{}; + PlaneOffsets output_surface{}; + u32 fce_ucode_offset{}; + INSERT_PADDING_WORDS(4); + std::array slot_context_id{}; + INSERT_PADDING_WORDS(16); +}; +static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size"); + +class Vic { +public: + enum class Method : u32 { + Execute = 0xc0, + SetControlParams = 0x1c1, + SetConfigStructOffset = 0x1c2, + SetOutputSurfaceLumaOffset = 0x1c8, + SetOutputSurfaceChromaUOffset = 0x1c9, + SetOutputSurfaceChromaVOffset = 0x1ca + }; + + explicit Vic(GPU& gpu, std::shared_ptr nvdec_processor); + ~Vic(); + + /// Write to the device state. + void ProcessMethod(Vic::Method method, const std::vector& arguments); + +private: + void Execute(); + + void VicStateWrite(u32 offset, u32 arguments); + VicRegisters vic_state{}; + + enum class VideoPixelFormat : u64_le { + RGBA8 = 0x1f, + BGRA8 = 0x20, + Yuv420 = 0x44, + }; + + union VicConfig { + u64_le raw{}; + BitField<0, 7, u64_le> pixel_format; + BitField<7, 2, u64_le> chroma_loc_horiz; + BitField<9, 2, u64_le> chroma_loc_vert; + BitField<11, 4, u64_le> block_linear_kind; + BitField<15, 4, u64_le> block_linear_height_log2; + BitField<19, 3, u64_le> reserved0; + BitField<22, 10, u64_le> reserved1; + BitField<32, 14, u64_le> surface_width_minus1; + BitField<46, 14, u64_le> surface_height_minus1; + }; + + GPU& gpu; + std::shared_ptr nvdec_processor; + + GPUVAddr config_struct_address{}; + GPUVAddr output_surface_luma_address{}; + GPUVAddr output_surface_chroma_u_address{}; + GPUVAddr output_surface_chroma_v_address{}; + + SwsContext* scaler_ctx{}; + s32 scaler_width{}; + s32 scaler_height{}; +}; + +} // namespace Tegra diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4bb9256e9..171f78183 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,9 +27,10 @@ namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system_, bool is_async_) +GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) : system{system_}, memory_manager{std::make_unique(system)}, dma_pusher{std::make_unique(system, *this)}, + cdma_pusher{std::make_unique(*this)}, use_nvdec{use_nvdec_}, maxwell_3d{std::make_unique(system, *memory_manager)}, fermi_2d{std::make_unique()}, kepler_compute{std::make_unique(system, *memory_manager)}, @@ -77,10 +78,18 @@ DmaPusher& GPU::DmaPusher() { return *dma_pusher; } +Tegra::CDmaPusher& GPU::CDmaPusher() { + return *cdma_pusher; +} + const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +const Tegra::CDmaPusher& GPU::CDmaPusher() const { + return *cdma_pusher; +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { // Synced GPU, is always in sync if (!is_async) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2d15d1c6f..b8c613b11 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvflinger/buffer_queue.h" +#include "video_core/cdma_pusher.h" #include "video_core/dma_pusher.h" using CacheAddr = std::uintptr_t; @@ -157,7 +158,7 @@ public: method_count(method_count) {} }; - explicit GPU(Core::System& system, bool is_async); + explicit GPU(Core::System& system, bool is_async, bool use_nvdec); virtual ~GPU(); /// Binds a renderer to the GPU. @@ -209,6 +210,15 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Returns a const reference to the GPU DMA pusher. + const Tegra::DmaPusher& DmaPusher() const; + + /// Returns a reference to the GPU CDMA pusher. + Tegra::CDmaPusher& CDmaPusher(); + + /// Returns a const reference to the GPU CDMA pusher. + const Tegra::CDmaPusher& CDmaPusher() const; + VideoCore::RendererBase& Renderer() { return *renderer; } @@ -249,8 +259,9 @@ public: return is_async; } - /// Returns a const reference to the GPU DMA pusher. - const Tegra::DmaPusher& DmaPusher() const; + bool UseNvdec() const { + return use_nvdec; + } struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -311,6 +322,9 @@ public: /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; + /// Push GPU command buffer entries to be processed + virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; + /// Swap buffers (render frame) virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; @@ -349,7 +363,9 @@ protected: Core::System& system; std::unique_ptr memory_manager; std::unique_ptr dma_pusher; + std::unique_ptr cdma_pusher; std::unique_ptr renderer; + const bool use_nvdec; private: /// Mapping of command subchannels to their bound engine ids @@ -372,6 +388,7 @@ private: std::array, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; std::mutex sync_mutex; + std::mutex device_mutex; std::condition_variable sync_cv; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 70a3d5738..a9baaf7ef 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,12 +10,13 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec) + : GPU{system, true, use_nvdec}, gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); cpu_context = renderer->GetRenderWindow().CreateSharedContext(); cpu_context->MakeCurrent(); } @@ -32,6 +33,27 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } +void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clear all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique(*this); + } + + // SubmitCommandBuffer would make the nvdec operations async, this is not currently working + // TODO(ameerj): RE proper async nvdec operation + // gpu_thread.SubmitCommandBuffer(std::move(entries)); + + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { gpu_thread.SwapBuffers(framebuffer); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index f89c855a5..0c0872e73 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,13 +20,14 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system); + explicit GPUAsynch(Core::System& system, bool use_nvdec); ~GPUAsynch() override; void Start() override; void ObtainContext() override; void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 1ca47ddef..ecf7bbdf3 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,7 +7,7 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} +GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {} GPUSynch::~GPUSynch() = default; @@ -26,6 +26,22 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->DispatchCalls(); } +void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clears all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique(*this); + } + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { renderer->SwapBuffers(framebuffer); } diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 297258cb1..9d778c71a 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,13 +19,14 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system); + explicit GPUSynch(Core::System& system, bool use_nvdec); ~GPUSynch() override; void Start() override; void ObtainContext() override; void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index bf761abf2..4b8f58283 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -18,7 +18,7 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, - SynchState& state) { + SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); @@ -42,6 +42,10 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, if (const auto submit_list = std::get_if(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); + } else if (const auto command_list = std::get_if(&next.data)) { + // NVDEC + cdma_pusher.Push(std::move(command_list->entries)); + cdma_pusher.DispatchCalls(); } else if (const auto data = std::get_if(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative(next.data)) { @@ -75,15 +79,19 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(system), std::ref(renderer), - std::ref(context), std::ref(dma_pusher), std::ref(state)}; + Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { + thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), + std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(std::move(entries))); } +void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) { + PushCommand(SubmitChCommandEntries(std::move(entries))); +} + void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 5a28335d6..32a34e3a7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -37,6 +37,14 @@ struct SubmitListCommand final { Tegra::CommandList entries; }; +/// Command to signal to the GPU thread that a cdma command list is ready for processing +struct SubmitChCommandEntries final { + explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries) + : entries{std::move(entries)} {} + + Tegra::ChCommandHeaderList entries; +}; + /// Command to signal to the GPU thread that a swap buffers is pending struct SwapBuffersCommand final { explicit SwapBuffersCommand(std::optional framebuffer) @@ -77,9 +85,9 @@ struct OnCommandListEndCommand final {}; struct GPUTickCommand final {}; using CommandData = - std::variant; + std::variant; struct CommandDataContainer { CommandDataContainer() = default; @@ -109,11 +117,14 @@ public: /// Creates and starts the GPU thread. void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher); + Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher); /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); + /// Push GPU CDMA command buffer entries to be processed + void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries); + /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 02cf53d15..6e70bd362 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -11,6 +11,7 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra { @@ -44,6 +45,12 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_ return Map(cpu_addr, *FindFreeRange(size, align), size); } +GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { + const std::optional gpu_addr = FindFreeRange(size, 1, true); + ASSERT(gpu_addr); + return Map(cpu_addr, *gpu_addr, size); +} + void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (!size) { return; @@ -108,7 +115,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s page_table[PageEntryIndex(gpu_addr)] = page_entry; } -std::optional MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const { +std::optional MemoryManager::FindFreeRange(std::size_t size, std::size_t align, + bool start_32bit_address) const { if (!align) { align = page_size; } else { @@ -116,7 +124,7 @@ std::optional MemoryManager::FindFreeRange(std::size_t size, std::size } u64 available_size{}; - GPUVAddr gpu_addr{address_space_start}; + GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start}; while (gpu_addr + available_size < address_space_size) { if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { available_size += page_size; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 53c8d122a..c078193d9 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -116,6 +116,7 @@ public: [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); + [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] std::optional AllocateFixed(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); void Unmap(GPUVAddr gpu_addr, std::size_t size); @@ -124,7 +125,8 @@ private: [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); - [[nodiscard]] std::optional FindFreeRange(std::size_t size, std::size_t align) const; + [[nodiscard]] std::optional FindFreeRange(std::size_t size, std::size_t align, + bool start_32bit_address = false) const; void TryLockPage(PageEntry page_entry, std::size_t size); void TryUnlockPage(PageEntry page_entry, std::size_t size); @@ -135,6 +137,7 @@ private: static constexpr u64 address_space_size = 1ULL << 40; static constexpr u64 address_space_start = 1ULL << 32; + static constexpr u64 address_space_start_low = 1ULL << 16; static constexpr u64 page_bits{16}; static constexpr u64 page_size{1 << page_bits}; static constexpr u64 page_mask{page_size - 1}; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a14df06a3..dd5cee4a1 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -44,10 +44,11 @@ namespace VideoCore { std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { std::unique_ptr gpu; + const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - gpu = std::make_unique(system); + gpu = std::make_unique(system, use_nvdec); } else { - gpu = std::make_unique(system); + gpu = std::make_unique(system, use_nvdec); } auto context = emu_window.CreateSharedContext(); -- cgit v1.2.3 From c04203b786d87ecb66811341e7ea776452664e91 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:09:17 -0400 Subject: nvdec: Tidy up header includes Prevents a few unnecessary inclusions. --- src/video_core/command_classes/codecs/codec.cpp | 1 + src/video_core/command_classes/codecs/codec.h | 2 -- src/video_core/command_classes/codecs/h264.cpp | 15 +++++++++++ src/video_core/command_classes/codecs/h264.h | 12 --------- src/video_core/command_classes/codecs/vp9.cpp | 31 +++++++++++++++++++++- src/video_core/command_classes/codecs/vp9.h | 32 ++--------------------- src/video_core/command_classes/codecs/vp9_types.h | 6 ++--- src/video_core/command_classes/host1x.cpp | 2 +- src/video_core/command_classes/host1x.h | 2 +- src/video_core/command_classes/nvdec.cpp | 6 +---- src/video_core/command_classes/nvdec.h | 6 ++--- src/video_core/command_classes/vic.cpp | 2 +- src/video_core/command_classes/vic.h | 4 +-- 13 files changed, 59 insertions(+), 62 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2df410be8..1adf3cd13 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "common/assert.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 2e56daf29..cb67094f6 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,8 +5,6 @@ #pragma once #include -#include -#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 1a39f7b23..54a749e2b 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -18,12 +18,27 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // +#include #include "common/bit_util.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Tegra::Decoder { +namespace { +// ZigZag LUTs from libavcodec. +constexpr std::array zig_zag_direct{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, + 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, + 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; + +constexpr std::array zig_zag_scan{ + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, +}; +} // Anonymous namespace + H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 21752dd90..c36a54399 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -55,18 +55,6 @@ public: const std::vector& GetByteArray() const; private: - // ZigZag LUTs from libavcodec. - static constexpr std::array zig_zag_direct{ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, - 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, - 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, - }; - - static constexpr std::array zig_zag_scan{ - 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, - 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, - }; - void WriteBits(s32 value, s32 bit_count); void WriteExpGolombCodedInt(s32 value); void WriteExpGolombCodedUInt(u32 value); diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 3bae0bb5d..d888e773a 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -9,7 +9,7 @@ #include "video_core/memory_manager.h" namespace Tegra::Decoder { - +namespace { // Default compressed header probabilities once frame context resets constexpr Vp9EntropyProbs default_probs{ .y_mode_prob{ @@ -170,6 +170,35 @@ constexpr Vp9EntropyProbs default_probs{ .high_precision{128, 128}, }; +constexpr std::array norm_lut{ + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +constexpr std::array map_lut{ + 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, + 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 19, +}; +} // Anonymous namespace + VP9::VP9(GPU& gpu) : gpu(gpu) {} VP9::~VP9() = default; diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 748e11bae..dc52ddbde 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -4,9 +4,9 @@ #pragma once -#include +#include #include -#include "common/common_funcs.h" + #include "common/common_types.h" #include "common/stream.h" #include "video_core/command_classes/codecs/vp9_types.h" @@ -52,17 +52,6 @@ private: u32 range{0xff}; s32 count{-24}; s32 half_probability{128}; - static constexpr std::array norm_lut{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; }; class VpxBitStreamWriter { @@ -193,23 +182,6 @@ private: s32 diff_update_probability = 252; s32 frame_sync_code = 0x498342; - static constexpr std::array map_lut = { - 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, - 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, - 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160, - 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, - 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, - 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, - }; }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 8688fdac0..a50acf6e8 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -4,13 +4,11 @@ #pragma once -#include -#include +#include +#include #include -#include "common/cityhash.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/command_classes/nvdec_common.h" namespace Tegra { class GPU; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index a5234ee47..c4dd4881a 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -15,7 +15,7 @@ void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { std::memcpy(state_offset, &arguments, sizeof(u32)); } -void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector& arguments) { +void Tegra::Host1x::ProcessMethod(Method method, const std::vector& arguments) { StateWrite(static_cast(method), arguments[0]); switch (method) { case Method::WaitSyncpt: diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 501a5ed2e..013eaa0c1 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -61,7 +61,7 @@ public: ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Host1x::Method method, const std::vector& arguments); + void ProcessMethod(Method method, const std::vector& arguments); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index ede9466eb..8ca7a7b06 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -2,13 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/assert.h" -#include "common/bit_util.h" -#include "core/memory.h" #include "video_core/command_classes/nvdec.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Tegra { @@ -16,7 +12,7 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique(gpu)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector& arguments) { +void Nvdec::ProcessMethod(Method method, const std::vector& arguments) { if (method == Method::SetVideoCodec) { codec->StateWrite(static_cast(method), arguments[0]); } else { diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index c1a9d843e..af14f9857 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -4,8 +4,8 @@ #pragma once +#include #include -#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/codecs/codec.h" @@ -23,7 +23,7 @@ public: ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Nvdec::Method method, const std::vector& arguments); + void ProcessMethod(Method method, const std::vector& arguments); /// Return most recently decoded frame AVFrame* GetFrame(); @@ -34,6 +34,6 @@ private: void Execute(); GPU& gpu; - std::unique_ptr codec; + std::unique_ptr codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e15a1a8..5b52da277 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { std::memcpy(state_offset, &arguments, sizeof(u32)); } -void Vic::ProcessMethod(Vic::Method method, const std::vector& arguments) { +void Vic::ProcessMethod(Method method, const std::vector& arguments) { LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); VicStateWrite(static_cast(method), arguments[0]); const u64 arg = static_cast(arguments[0]) << 8; diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index dd0a2aed8..8c4e284a1 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h @@ -63,11 +63,11 @@ public: SetOutputSurfaceChromaVOffset = 0x1ca }; - explicit Vic(GPU& gpu, std::shared_ptr nvdec_processor); + explicit Vic(GPU& gpu, std::shared_ptr nvdec_processor); ~Vic(); /// Write to the device state. - void ProcessMethod(Vic::Method method, const std::vector& arguments); + void ProcessMethod(Method method, const std::vector& arguments); private: void Execute(); -- cgit v1.2.3 From dcc26c54a52eb6fe2fc5fcb7a4b34bc1a4ad4789 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:20:17 -0400 Subject: vp9: Resolve variable shadowing --- src/video_core/command_classes/codecs/vp9.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index d888e773a..04e015ae8 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -415,7 +415,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) frame.info.bitstream_size); } // Buffer two frames, saving the last show frame info - if (next_next_frame.bit_stream.size() != 0) { + if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ .info = frame.info, .bit_stream = frame.bit_stream, @@ -425,15 +425,15 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) frame.bit_stream = next_next_frame.bit_stream; next_next_frame = std::move(temp); - if (next_frame.bit_stream.size() != 0) { - Vp9FrameContainer temp{ + if (!next_frame.bit_stream.empty()) { + Vp9FrameContainer temp2{ .info = frame.info, .bit_stream = frame.bit_stream, }; next_frame.info.show_frame = frame.info.last_frame_shown; frame.info = next_frame.info; frame.bit_stream = next_frame.bit_stream; - next_frame = std::move(temp); + next_frame = std::move(temp2); } else { next_frame.info = frame.info; next_frame.bit_stream = frame.bit_stream; -- cgit v1.2.3 From 3b5d5fa86fb2ffe2e78eab86e4a8db189f69f0fa Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:20:41 -0400 Subject: vp9: Remove pessimizing moves The move will already occur without std::move. --- src/video_core/command_classes/codecs/vp9.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 04e015ae8..0315a80bc 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -395,7 +395,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) // to avoid buffering frame data needed for reference frame updating in the header composition. std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); - return std::move(vp9_info); + return vp9_info; } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { @@ -408,7 +408,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) Vp9FrameContainer frame{}; { gpu.SyncGuestHost(); - frame.info = std::move(GetVp9PictureInfo(state)); + frame.info = GetVp9PictureInfo(state); frame.bit_stream.resize(frame.info.bitstream_size); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), -- cgit v1.2.3 From 111802bbbb84e1713a6a093ca334009f709d02ed Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:26:03 -0400 Subject: vp9: Join declarations with assignments --- src/video_core/command_classes/codecs/vp9.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 0315a80bc..380180975 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -677,7 +677,6 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { current_frame_info.intra_only = true; } else { - std::array ref_frame_index; if (!current_frame_info.show_frame) { uncomp_writer.WriteBit(current_frame_info.intra_only); @@ -692,9 +691,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { } // Last, Golden, Altref frames - ref_frame_index = std::array{0, 1, 2}; + std::array ref_frame_index{0, 1, 2}; - // set when next frame is hidden + // Set when next frame is hidden // altref and golden references are swapped if (swap_next_golden) { ref_frame_index = std::array{0, 2, 1}; @@ -783,17 +782,19 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { const s8 old_deltas = loop_filter_ref_deltas[index]; const s8 new_deltas = current_frame_info.ref_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; - loop_filter_delta_update |= - (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas); + update_loop_filter_ref_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; } for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { const s8 old_deltas = loop_filter_mode_deltas[index]; const s8 new_deltas = current_frame_info.mode_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; - loop_filter_delta_update |= - (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas); + update_loop_filter_mode_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; } uncomp_writer.WriteBit(loop_filter_delta_update); -- cgit v1.2.3 From 00decfbb073311c04a247c3cd0a08d5307d4a2e3 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:26:17 -0400 Subject: vp9: Remove dead code --- src/video_core/command_classes/codecs/vp9.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 380180975..2a3bdb7e7 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -634,12 +634,6 @@ std::vector VP9::ComposeCompressedHeader() { writer.End(); return writer.GetBuffer(); - - const auto writer_bytearray = writer.GetBuffer(); - - std::vector compressed_header(writer_bytearray.size()); - std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size()); - return compressed_header; } VpxBitStreamWriter VP9::ComposeUncompressedHeader() { -- cgit v1.2.3 From 6291975731f6237de3edc81a96aca87cdae01000 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:27:29 -0400 Subject: vp9: std::move buffer within ComposeFrameHeader() We can move the buffer here to avoid a heap reallocation --- src/video_core/command_classes/codecs/vp9.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 2a3bdb7e7..bcbea9c05 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -853,7 +853,7 @@ std::vector& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { { Vp9FrameContainer curr_frame = GetCurrentFrame(state); current_frame_info = curr_frame.info; - bitstream = curr_frame.bit_stream; + bitstream = std::move(curr_frame.bit_stream); } // The uncompressed header routine sets PrevProb parameters needed for the compressed header -- cgit v1.2.3 From cce14b4cd75fd3bbab548375a0a547ec499283e2 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 02:50:03 -0400 Subject: h264: Make WriteUe take a u32 Enforces the type of the desired value in calling code. --- src/video_core/command_classes/codecs/h264.cpp | 13 +++++++------ src/video_core/command_classes/codecs/h264.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 54a749e2b..549a40f52 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -63,7 +63,8 @@ std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteU(0, 8); writer.WriteU(31, 8); writer.WriteUe(0); - const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3; + const auto chroma_format_idc = + static_cast((context.h264_parameter_set.flags >> 12) & 3); writer.WriteUe(chroma_format_idc); if (chroma_format_idc == 3) { writer.WriteBit(false); @@ -74,8 +75,8 @@ std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag writer.WriteBit(false); // Scaling matrix present flag - const s32 order_cnt_type = static_cast((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast((context.h264_parameter_set.flags >> 8) & 0xf)); + const auto order_cnt_type = static_cast((context.h264_parameter_set.flags >> 14) & 3); + writer.WriteUe(static_cast((context.h264_parameter_set.flags >> 8) & 0xf)); writer.WriteUe(order_cnt_type); if (order_cnt_type == 0) { writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); @@ -115,7 +116,7 @@ std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteUe(0); writer.WriteUe(0); - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag); + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); writer.WriteBit(false); writer.WriteUe(0); writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); @@ -187,8 +188,8 @@ void H264BitWriter::WriteSe(s32 value) { WriteExpGolombCodedInt(value); } -void H264BitWriter::WriteUe(s32 value) { - WriteExpGolombCodedUInt((u32)value); +void H264BitWriter::WriteUe(u32 value) { + WriteExpGolombCodedUInt(value); } void H264BitWriter::End() { diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index c36a54399..f2292fd2f 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -38,7 +38,7 @@ public: /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax void WriteU(s32 value, s32 value_sz); void WriteSe(s32 value); - void WriteUe(s32 value); + void WriteUe(u32 value); /// Finalize the bitstream void End(); -- cgit v1.2.3 From 047e77e2f0768775c765d8098ee8475018a06270 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 27 Oct 2020 03:02:39 -0400 Subject: sync_manager: Amend parameter order of calls to SyncptIncr constructor Corrects some cases where the arguments would be incorrectly swapped. --- src/video_core/command_classes/sync_manager.cpp | 14 +++++++------- src/video_core/command_classes/sync_manager.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index a0ab44855..19dc9e0ab 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -27,22 +27,22 @@ SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} SyncptIncrManager::~SyncptIncrManager() = default; void SyncptIncrManager::Increment(u32 id) { - increments.push_back(SyncptIncr{0, id, true}); + increments.emplace_back(0, 0, id, true); IncrementAllDone(); } u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { const u32 handle = current_id++; - increments.push_back(SyncptIncr{handle, class_id, id}); + increments.emplace_back(handle, class_id, id); return handle; } void SyncptIncrManager::SignalDone(u32 handle) { - auto done_incr = std::find_if(increments.begin(), increments.end(), - [handle](SyncptIncr incr) { return incr.id == handle; }); - if (done_incr != increments.end()) { - const SyncptIncr incr = *done_incr; - *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true}; + const auto done_incr = + std::find_if(increments.begin(), increments.end(), + [handle](const SyncptIncr& incr) { return incr.id == handle; }); + if (done_incr != increments.cend()) { + done_incr->complete = true; } IncrementAllDone(); } diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h index 353b67573..2c321ec58 100644 --- a/src/video_core/command_classes/sync_manager.h +++ b/src/video_core/command_classes/sync_manager.h @@ -32,8 +32,8 @@ struct SyncptIncr { u32 syncpt_id; bool complete; - SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false) - : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} + SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false) + : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} }; class SyncptIncrManager { -- cgit v1.2.3 From 4a451e584917ec89fa593068bacd6b46a96e8fb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Oct 2020 02:31:30 -0300 Subject: video_core: Enforce -Werror=type-limits Silences one warning and avoids introducing more in the future. --- src/video_core/CMakeLists.txt | 1 + src/video_core/command_classes/codecs/vp9.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fdfc885fc..15bd92f09 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -303,6 +303,7 @@ else() -Werror=conversion -Wno-error=sign-conversion -Werror=switch + -Werror=type-limits -Werror=unused-variable $<$:-Werror=class-memaccess> diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 3bae0bb5d..c47a0e060 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -893,7 +893,7 @@ void VpxRangeEncoder::Write(bool bit, s32 probability) { if (((low_value << (offset - 1)) >> 31) != 0) { const s32 current_pos = static_cast(base_stream.GetPosition()); base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); - while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) { + while (PeekByte() == 0xff) { base_stream.WriteByte(0); base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos); -- cgit v1.2.3 From 79da90cea87627c4cdea994f1987f9f1efa123f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Oct 2020 02:25:40 -0300 Subject: video_core: Enforce -Wredundant-move and -Wpessimizing-move Silence three warnings and make them errors to avoid introducing more in the future. --- src/video_core/CMakeLists.txt | 2 ++ src/video_core/command_classes/codecs/vp9.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 3 +-- src/video_core/renderer_vulkan/wrapper.cpp | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fdfc885fc..567084241 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -302,6 +302,8 @@ else() target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion + -Werror=pessimizing-move + -Werror=redundant-move -Werror=switch -Werror=unused-variable diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 3bae0bb5d..747c63d85 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -366,7 +366,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) // to avoid buffering frame data needed for reference frame updating in the header composition. std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); - return std::move(vp9_info); + return vp9_info; } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 166ee34e1..70dd0c3c6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -317,8 +317,7 @@ std::optional> ShaderDiskCacheOpenGL::Lo return std::nullopt; } } - - return std::move(entries); + return entries; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index c034558a3..4e83303d8 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -844,7 +844,7 @@ std::optional> EnumerateInstanceExtensionProp VK_SUCCESS) { return std::nullopt; } - return std::move(properties); + return properties; } std::optional> EnumerateInstanceLayerProperties( -- cgit v1.2.3 From 44b552be712a9db83a92710648cde4053adf876d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Oct 2020 17:05:41 -0300 Subject: shader/arithmetic: Implement FCMP immediate + register variant Trivially add the encoding for this. --- src/video_core/engines/shader_bytecode.h | 2 ++ src/video_core/shader/decode/arithmetic.cpp | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d374b73cf..a3c05d1b0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1893,6 +1893,7 @@ public: ICMP_IMM, FCMP_RR, FCMP_RC, + FCMP_IMMR, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2205,6 +2206,7 @@ private: INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), + INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 4db329fa5..afef5948d 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: { + case OpCode::Id::FCMP_RC: + case OpCode::Id::FCMP_IMMR: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); -- cgit v1.2.3 From 657771bdcbe2a35277e8d7e453b00ab21a7f27f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Oct 2020 17:11:24 -0300 Subject: shader: Partially implement texture cube array shadow This implements texture cube arrays with shadow comparisons but doesn't fix the asserts related to it. Fixes out of bounds reads on swizzle constructors and makes them use bounds checked ::at instead of the unsafe operator[]. --- .../renderer_opengl/gl_arb_decompiler.cpp | 41 +++++++++++++--------- .../renderer_opengl/gl_shader_decompiler.cpp | 20 ++++++----- src/video_core/shader/decode/texture.cpp | 1 - 3 files changed, 37 insertions(+), 25 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index f4db62787..d6120c23e 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -39,8 +39,8 @@ using Operation = const OperationNode&; constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; char Swizzle(std::size_t component) { - ASSERT(component < 4); - return component["xyzw"]; + static constexpr std::string_view SWIZZLE{"xyzw"}; + return SWIZZLE.at(component); } constexpr bool IsGenericAttribute(Attribute::Index index) { @@ -224,7 +224,7 @@ private: std::string Visit(const Node& node); - std::pair BuildCoords(Operation); + std::tuple BuildCoords(Operation); std::string BuildAoffi(Operation); std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); @@ -1416,12 +1416,12 @@ std::string ARBDecompiler::Visit(const Node& node) { return {}; } -std::pair ARBDecompiler::BuildCoords(Operation operation) { +std::tuple ARBDecompiler::BuildCoords(Operation operation) { const auto& meta = std::get(operation.GetMeta()); UNIMPLEMENTED_IF(meta.sampler.is_indexed); - UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube); + const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && + meta.sampler.type == Tegra::Shader::TextureType::TextureCube; const std::size_t count = operation.GetOperandsCount(); std::string temporary = AllocVectorTemporary(); std::size_t i = 0; @@ -1429,12 +1429,21 @@ std::pair ARBDecompiler::BuildCoords(Operation operati AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); } if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); + AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); + ++i; } if (meta.sampler.is_shadow) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); + std::string compare = Visit(meta.depth_compare); + if (is_extended) { + ASSERT(i == 4); + std::string extra_coord = AllocVectorTemporary(); + AddLine("MOV.F {}.x, {};", extra_coord, compare); + return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; + } + AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); + ++i; } - return {std::move(temporary), i}; + return {temporary, temporary, i}; } std::string ARBDecompiler::BuildAoffi(Operation operation) { @@ -1859,7 +1868,7 @@ std::string ARBDecompiler::LogicalAddCarry(Operation operation) { std::string ARBDecompiler::Texture(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [temporary, swizzle] = BuildCoords(operation); + const auto [coords, temporary, swizzle] = BuildCoords(operation); std::string_view opcode = "TEX"; std::string extra; @@ -1888,7 +1897,7 @@ std::string ARBDecompiler::Texture(Operation operation) { } } - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, + AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); @@ -1897,7 +1906,7 @@ std::string ARBDecompiler::Texture(Operation operation) { std::string ARBDecompiler::TextureGather(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [temporary, swizzle] = BuildCoords(operation); + const auto [coords, temporary, swizzle] = BuildCoords(operation); std::string comp; if (!meta.sampler.is_shadow) { @@ -1907,7 +1916,7 @@ std::string ARBDecompiler::TextureGather(Operation operation) { AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); + AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } @@ -1945,13 +1954,13 @@ std::string ARBDecompiler::TextureQueryLod(Operation operation) { std::string ARBDecompiler::TexelFetch(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [temporary, swizzle] = BuildCoords(operation); + const auto [coords, temporary, swizzle] = BuildCoords(operation); if (!meta.sampler.is_buffer) { ASSERT(swizzle < 4); AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), + AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); @@ -1962,7 +1971,7 @@ std::string ARBDecompiler::TextureGradient(Operation operation) { const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; const std::string ddx = AllocVectorTemporary(); const std::string ddy = AllocVectorTemporary(); - const std::string coord = BuildCoords(operation).first; + const std::string coord = std::get<1>(BuildCoords(operation)); const std::size_t num_components = meta.derivates.size() / 2; for (std::size_t index = 0; index < num_components; ++index) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bbb8fb095..95ca96c8e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2056,15 +2056,19 @@ private: } Expression Texture(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr = GenerateTexture( - operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}}); - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; + const auto meta = std::get(operation.GetMeta()); + const bool separate_dc = meta.sampler.type == TextureType::TextureCube && + meta.sampler.is_array && meta.sampler.is_shadow; + // TODO: Replace this with an array and make GenerateTexture use C++20 std::span + const std::vector extras{ + TextureOffset{}, + TextureArgument{Type::Float, meta.bias}, + }; + std::string expr = GenerateTexture(operation, "", extras, separate_dc); + if (meta.sampler.is_shadow) { + expr = fmt::format("vec4({})", expr); } - return {expr + GetSwizzle(meta->element), Type::Float}; + return {expr + GetSwizzle(meta.element), Type::Float}; } Expression TextureLod(Operation operation) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4e932a4b6..02fdccd86 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -556,7 +556,6 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, const bool is_shadow = depth_compare != nullptr; const bool is_bindless = bindless_reg.has_value(); - UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow); ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, "Illegal texture type"); -- cgit v1.2.3 From 94eca09cf6541634a885526cf0a850fc4fb1e56a Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 28 Oct 2020 16:03:35 -0700 Subject: video_core: cdma_pusher: Add missing LOG_DEBUG field in ExecuteCommand. --- src/video_core/cdma_pusher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index d774db107..b60f86260 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -144,7 +144,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { } case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", - static_cast(vic_thi_state.method_0)); + static_cast(vic_thi_state.method_0), data); vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), {data}); break; -- cgit v1.2.3 From 362020613672e9b260552807a3edf8cb58c23dee Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 29 Oct 2020 14:16:45 -0400 Subject: async_shaders: Increase Async worker thread count for 8+ thread cpus Adds 1 async worker thread for every 2 available threads above 8 --- src/video_core/shader/async_shaders.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index aabd62c5c..39cc3b869 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() { } void AsyncShaders::AllocateWorkers() { - // Max worker threads we should allow - constexpr u32 MAX_THREADS = 4; - // Deduce how many threads we can use - const u32 threads_used = std::thread::hardware_concurrency() / 4; - // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1U, threads_used); - // Don't use more than MAX_THREADS - const auto num_workers = std::min(max_worker_count, MAX_THREADS); + // Use at least one thread + u32 num_workers = 1; + + // Deduce how many more threads we can use + const u32 thread_count = std::thread::hardware_concurrency(); + if (thread_count >= 8) { + // Increase async workers by 1 for every 2 threads >= 8 + num_workers += 1 + (thread_count - 8) / 2; + } // If we already have workers queued, ignore if (num_workers == worker_threads.size()) { -- cgit v1.2.3 From 5553bd3ba22f11f4b989d74ac0e3d46f0e7fb22b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Oct 2020 01:48:02 -0400 Subject: General: Resolve a few missing initializer warnings Resolves a few -Wmissing-initializer warnings. --- src/video_core/renderer_vulkan/vk_device.cpp | 9 +++++++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 ++ src/video_core/texture_cache/surface_params.cpp | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index e1217ca83..f34ed6735 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() { VkPhysicalDeviceDriverPropertiesKHR driver{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, .pNext = nullptr, + .driverID = {}, + .driverName = {}, + .driverInfo = {}, + .conformanceVersion = {}, }; - VkPhysicalDeviceProperties2KHR properties{ + VkPhysicalDeviceProperties2KHR device_properties{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &driver, + .properties = {}, }; - physical.GetProperties2KHR(properties); + physical.GetProperties2KHR(device_properties); driver_id = driver.driverID; vendor_name = driver.driverName; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 696eaeb5f..0e8f9c352 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -159,6 +159,7 @@ std::vector VKGraphicsPipeline::CreateShaderModules( .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, + .codeSize = 0, }; std::vector modules; @@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .logicOp = VK_LOGIC_OP_COPY, .attachmentCount = static_cast(num_attachments), .pAttachments = cb_attachments.data(), + .blendConstants = {}, }; std::vector dynamic_states{ diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e8515321b..13dd16356 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( .is_tiled = is_tiled, .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, + .is_layered = false, .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, -- cgit v1.2.3 From f8543249f0e96a3148f3ea888bfa62882e67f523 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Oct 2020 22:34:44 -0400 Subject: vp9: Make some member functions internally linked These helper functions don't directly modify any member state and can be hidden from view. --- src/video_core/command_classes/codecs/vp9.cpp | 102 ++++++++++++++------------ src/video_core/command_classes/codecs/vp9.h | 10 --- 2 files changed, 54 insertions(+), 58 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index b3e98aa9f..aeb9866de 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -197,6 +197,60 @@ constexpr std::array map_lut{ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, }; + +// 6.2.14 Tile size calculation + +s32 CalcMinLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 min_log2 = 0; + + while ((64 << min_log2) < sb64_cols) { + min_log2++; + } + + return min_log2; +} + +s32 CalcMaxLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 max_log2 = 1; + + while ((sb64_cols >> max_log2) >= 4) { + max_log2++; + } + + return max_log2 - 1; +} + +// Recenters probability. Based on section 6.3.6 of VP9 Specification +s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { + if (new_prob > old_prob * 2) { + return new_prob; + } + + if (new_prob >= old_prob) { + return (new_prob - old_prob) * 2; + } + + return (old_prob - new_prob) * 2 - 1; +} + +// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification +s32 RemapProbability(s32 new_prob, s32 old_prob) { + new_prob--; + old_prob--; + + std::size_t index{}; + + if (old_prob * 2 <= 0xff) { + index = static_cast(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); + } else { + index = static_cast( + std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + } + + return map_lut[index]; +} } // Anonymous namespace VP9::VP9(GPU& gpu) : gpu(gpu) {} @@ -236,32 +290,6 @@ void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_pro EncodeTermSubExp(writer, delta); } -s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) { - new_prob--; - old_prob--; - - std::size_t index{}; - - if (old_prob * 2 <= 0xff) { - index = static_cast(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); - } else { - index = static_cast( - std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); - } - - return map_lut[index]; -} - -s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) { - if (new_prob > old_prob * 2) { - return new_prob; - } else if (new_prob >= old_prob) { - return (new_prob - old_prob) * 2; - } else { - return (old_prob - new_prob) * 2 - 1; - } -} - void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { if (WriteLessThan(writer, value, 16)) { writer.Write(value, 4); @@ -361,28 +389,6 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } } -s32 VP9::CalcMinLog2TileCols(s32 frame_width) { - const s32 sb64_cols = (frame_width + 63) / 64; - s32 min_log2 = 0; - - while ((64 << min_log2) < sb64_cols) { - min_log2++; - } - - return min_log2; -} - -s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) { - const s32 sb64_cols = (frameWidth + 63) / 64; - s32 max_log2 = 1; - - while ((sb64_cols >> max_log2) >= 4) { - max_log2++; - } - - return max_log2 - 1; -} - Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index dc52ddbde..3826f2c95 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -121,12 +121,6 @@ private: /// Generates compressed header probability deltas in the bitstream writer void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); - /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification - s32 RemapProbability(s32 new_prob, s32 old_prob); - - /// Recenters probability. Based on section 6.3.6 of VP9 Specification - s32 RecenterNonNeg(s32 new_prob, s32 old_prob); - /// Inverse of 6.3.4 Decode term subexp void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); @@ -146,10 +140,6 @@ private: /// Write motion vector probability updates. 6.3.17 in the spec void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); - /// 6.2.14 Tile size calculation - s32 CalcMinLog2TileCols(s32 frame_width); - s32 CalcMaxLog2TileCols(s32 frame_width); - /// Returns VP9 information from NVDEC provided offset and size Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); -- cgit v1.2.3 From badea3b30134b02c6502c8174719f2c984e37524 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Oct 2020 22:35:52 -0400 Subject: vp9: Provide a default initializer for "hidden" member The API of VP9 exposes a WasFrameHidden() function which accesses this member. Given the constructor previously didn't initialize this member, it's a potential vector for an uninitialized read. Instead, we can initialize this to a deterministic value to prevent that from occurring. --- src/video_core/command_classes/codecs/vp9.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 3826f2c95..94e8f9090 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -159,7 +159,7 @@ private: std::array loop_filter_ref_deltas{}; std::array loop_filter_mode_deltas{}; - bool hidden; + bool hidden = false; s64 current_frame_number = -2; // since we buffer 2 frames s32 grace_period = 6; // frame offsets need to stabilize std::array frame_ctxs{}; -- cgit v1.2.3 From 0d713cf8eb8c8b8802584c73b83d5ca9d88c70b2 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Oct 2020 22:40:46 -0400 Subject: vp9: Mark functions with [[nodiscard]] where applicable Prevents values from mistakenly being discarded in cases where it's a bug to do so. --- src/video_core/command_classes/codecs/vp9.cpp | 8 ++++---- src/video_core/command_classes/codecs/vp9.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index aeb9866de..42520f856 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -200,7 +200,7 @@ constexpr std::array map_lut{ // 6.2.14 Tile size calculation -s32 CalcMinLog2TileCols(s32 frame_width) { +[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) { const s32 sb64_cols = (frame_width + 63) / 64; s32 min_log2 = 0; @@ -211,7 +211,7 @@ s32 CalcMinLog2TileCols(s32 frame_width) { return min_log2; } -s32 CalcMaxLog2TileCols(s32 frame_width) { +[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) { const s32 sb64_cols = (frame_width + 63) / 64; s32 max_log2 = 1; @@ -223,7 +223,7 @@ s32 CalcMaxLog2TileCols(s32 frame_width) { } // Recenters probability. Based on section 6.3.6 of VP9 Specification -s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { +[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { if (new_prob > old_prob * 2) { return new_prob; } @@ -236,7 +236,7 @@ s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { } // Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification -s32 RemapProbability(s32 new_prob, s32 old_prob) { +[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) { new_prob--; old_prob--; diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 94e8f9090..76b5a8283 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -37,11 +37,11 @@ public: /// Signal the end of the bitstream void End(); - std::vector& GetBuffer() { + [[nodiscard]] std::vector& GetBuffer() { return base_stream.GetBuffer(); } - const std::vector& GetBuffer() const { + [[nodiscard]] const std::vector& GetBuffer() const { return base_stream.GetBuffer(); } @@ -75,10 +75,10 @@ public: void Flush(); /// Returns byte_array - std::vector& GetByteArray(); + [[nodiscard]] std::vector& GetByteArray(); /// Returns const byte_array - const std::vector& GetByteArray() const; + [[nodiscard]] const std::vector& GetByteArray() const; private: /// Write bit_count bits from value into buffer @@ -104,7 +104,7 @@ public: std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. - bool WasFrameHidden() const { + [[nodiscard]] bool WasFrameHidden() const { return hidden; } @@ -141,17 +141,17 @@ private: void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); /// Returns VP9 information from NVDEC provided offset and size - Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); /// Returns frame to be decoded after buffering - Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); /// Use NVDEC providied information to compose the headers for the current frame - std::vector ComposeCompressedHeader(); - VpxBitStreamWriter ComposeUncompressedHeader(); + [[nodiscard]] std::vector ComposeCompressedHeader(); + [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); GPU& gpu; std::vector frame; -- cgit v1.2.3 From 12eeffcb7c7d9d97ee55c96a760dd1f655c1d507 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Oct 2020 22:45:33 -0400 Subject: vp9: Be explicit with copy and move operators It's deprecated in the language to autogenerate these if the destructor for a type is specified, so we can explicitly specify how we want these to be generated. --- src/video_core/command_classes/codecs/vp9.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 76b5a8283..05c9682fa 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -25,6 +25,12 @@ public: VpxRangeEncoder(); ~VpxRangeEncoder(); + VpxRangeEncoder(const VpxRangeEncoder&) = delete; + VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete; + + VpxRangeEncoder(VpxRangeEncoder&&) = default; + VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default; + /// Writes the rightmost value_size bits from value into the stream void Write(s32 value, s32 value_size); @@ -59,6 +65,12 @@ public: VpxBitStreamWriter(); ~VpxBitStreamWriter(); + VpxBitStreamWriter(const VpxBitStreamWriter&) = delete; + VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete; + + VpxBitStreamWriter(VpxBitStreamWriter&&) = default; + VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default; + /// Write an unsigned integer value void WriteU(u32 value, u32 value_size); @@ -99,6 +111,12 @@ public: explicit VP9(GPU& gpu); ~VP9(); + VP9(const VP9&) = delete; + VP9& operator=(const VP9&) = delete; + + VP9(VP9&&) = default; + VP9& operator=(VP9&&) = delete; + /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec /// documentation std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); -- cgit v1.2.3 From 6053b955525be69eb73a928a7bdd43ba8f5e69a7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Oct 2020 22:11:41 -0700 Subject: video_core: gpu: Implement WaitFence and IncrementSyncPoint. --- src/video_core/dma_pusher.h | 25 +++++++++++++++++++++++ src/video_core/gpu.cpp | 48 ++++++++++++++++++++++----------------------- src/video_core/gpu.h | 25 +++++++++++++++++++---- 3 files changed, 70 insertions(+), 28 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d170..2026b7857 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { IncreaseOnce = 5 }; +// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence +// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. +// So the values you see in docs might be multiplied by 4. +enum class BufferMethods : u32 { + BindObject = 0x0, + Nop = 0x2, + SemaphoreAddressHigh = 0x4, + SemaphoreAddressLow = 0x5, + SemaphoreSequence = 0x6, + SemaphoreTrigger = 0x7, + NotifyIntr = 0x8, + WrcacheFlush = 0x9, + Unk28 = 0xA, + UnkCacheFlush = 0xB, + RefCnt = 0x14, + SemaphoreAcquire = 0x1A, + SemaphoreRelease = 0x1B, + FenceValue = 0x1C, + FenceAction = 0x1D, + WaitForInterrupt = 0x1E, + Unk7c = 0x1F, + Yield = 0x20, + NonPullerMethods = 0x40, +}; + struct CommandListHeader { union { u64 raw; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { void GPU::OnCommandListEnd() { renderer->Rasterizer().ReleaseFences(); } -// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence -// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. -// So the values you see in docs might be multiplied by 4. -enum class BufferMethods { - BindObject = 0x0, - Nop = 0x2, - SemaphoreAddressHigh = 0x4, - SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, - WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, - RefCnt = 0x14, - SemaphoreAcquire = 0x1A, - SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - Unk78 = 0x1E, - Unk7c = 0x1F, - Yield = 0x20, - NonPullerMethods = 0x40, -}; enum class GpuSemaphoreOperation { AcquireEqual = 0x1, @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: + break; case BufferMethods::FenceAction: + ProcessFenceActionMethod(); + break; + case BufferMethods::WaitForInterrupt: + ProcessWaitForInterruptMethod(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { } } +void GPU::ProcessFenceActionMethod() { + switch (regs.fence_action.op) { + case FenceOperation::Acquire: + WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + break; + case FenceOperation::Increment: + IncrementSyncPoint(regs.fence_action.syncpoint_id); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented operation {}", + static_cast(regs.fence_action.op.Value())); + } +} + +void GPU::ProcessWaitForInterruptMethod() { + // TODO(bunnei) ImplementMe + LOG_WARNING(HW_GPU, "(STUBBED) called"); +} + void GPU::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; const auto op = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11..5444b49f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -263,6 +263,24 @@ public: return use_nvdec; } + enum class FenceOperation : u32 { + Acquire = 0, + Increment = 1, + }; + + union FenceAction { + u32 raw; + BitField<0, 1, FenceOperation> op; + BitField<8, 24, u32> syncpoint_id; + + static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + FenceAction result{}; + result.op.Assign(op); + result.syncpoint_id.Assign(syncpoint_id); + return {result.raw}; + } + }; + struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -291,10 +309,7 @@ public: u32 semaphore_acquire; u32 semaphore_release; u32 fence_value; - union { - BitField<4, 4, u32> operation; - BitField<8, 8, u32> id; - } fence_action; + FenceAction fence_action; INSERT_UNION_PADDING_WORDS(0xE2); // Puller state @@ -342,6 +357,8 @@ protected: private: void ProcessBindMethod(const MethodCall& method_call); + void ProcessFenceActionMethod(); + void ProcessWaitForInterruptMethod(); void ProcessSemaphoreTriggerMethod(); void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); -- cgit v1.2.3 From c64545d07ae57816bc658ca7c45559d0b0d49f89 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 29 Oct 2020 21:13:04 -0700 Subject: video_core: dma_pusher: Add support for prefetched command lists. --- src/video_core/dma_pusher.cpp | 56 +++++++++++++++++++++++++------------------ src/video_core/dma_pusher.h | 21 ++++++++++++++-- 2 files changed, 52 insertions(+), 25 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33..9c49c6153 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -45,32 +45,42 @@ bool DmaPusher::Step() { return false; } - const CommandList& command_list{dma_pushbuffer.front()}; - ASSERT_OR_EXECUTE(!command_list.empty(), { - // Somehow the command_list is empty, in order to avoid a crash - // We ignore it and assume its size is 0. + CommandList& command_list{dma_pushbuffer.front()}; + + ASSERT_OR_EXECUTE( + command_list.command_lists.size() || command_list.prefetch_command_list.size(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); + + if (command_list.prefetch_command_list.size()) { + // Prefetched command list from nvdrv, used for things like synchronization + command_headers = std::move(command_list.prefetch_command_list); dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - return true; - }); - const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; - const GPUVAddr dma_get = command_list_header.addr; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } - - if (command_list_header.size == 0) { - return true; - } + } else { + const CommandListHeader command_list_header{ + command_list.command_lists[dma_pushbuffer_subindex]}; + const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; + const GPUVAddr dma_get = command_list_header.addr; + + if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } - // Push buffer non-empty, read a word - command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + if (command_list_header.size == 0) { + return true; + } + // Push buffer non-empty, read a word + command_headers.resize(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 2026b7857..99b30ca0d 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -74,9 +74,26 @@ union CommandHeader { static_assert(std::is_standard_layout_v, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); +static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, + SubmissionMode mode) { + CommandHeader result{}; + result.method.Assign(static_cast(method)); + result.arg_count.Assign(arg_count); + result.mode.Assign(mode); + return result; +} + class GPU; -using CommandList = std::vector; +struct CommandList final { + CommandList() = default; + explicit CommandList(std::size_t size) : command_lists(size) {} + explicit CommandList(std::vector&& prefetch_command_list) + : prefetch_command_list{std::move(prefetch_command_list)} {} + + std::vector command_lists; + std::vector prefetch_command_list; +}; /** * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the @@ -85,7 +102,7 @@ using CommandList = std::vector; * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for * details on this implementation. */ -class DmaPusher { +class DmaPusher final { public: explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); -- cgit v1.2.3 From c6e1c46ac70bf31b54f756f9611b1cf086b63fb0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 29 Oct 2020 21:13:48 -0700 Subject: video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash. --- src/video_core/dma_pusher.cpp | 24 ++++++++++++++++++++++++ src/video_core/dma_pusher.h | 3 +++ 2 files changed, 27 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9c49c6153..105b85a92 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -12,6 +13,20 @@ namespace Tegra { +void CommandList::RefreshIntegrityChecks(GPU& gpu) { + command_list_hashes.resize(command_lists.size()); + + for (std::size_t index = 0; index < command_lists.size(); ++index) { + const CommandListHeader command_list_header = command_lists[index]; + std::vector command_headers(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), + command_list_header.size * sizeof(u32)); + command_list_hashes[index] = + Common::CityHash64(reinterpret_cast(command_headers.data()), + command_list_header.size * sizeof(u32)); + } +} + DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -80,6 +95,15 @@ bool DmaPusher::Step() { command_headers.resize(command_list_header.size); gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); + + // Integrity check + const u64 new_hash = Common::CityHash64(reinterpret_cast(command_headers.data()), + command_list_header.size * sizeof(u32)); + if (new_hash != next_hash) { + LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); + dma_pushbuffer.pop(); + return true; + } } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 99b30ca0d..8496ba2da 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -91,7 +91,10 @@ struct CommandList final { explicit CommandList(std::vector&& prefetch_command_list) : prefetch_command_list{std::move(prefetch_command_list)} {} + void RefreshIntegrityChecks(GPU& gpu); + std::vector command_lists; + std::vector command_list_hashes; std::vector prefetch_command_list; }; -- cgit v1.2.3 From 4f0f481f63d0d5587a6d6d319f9815de87ed79ec Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 30 Oct 2020 16:13:29 -0400 Subject: nvdec: Make use of [[nodiscard]] where applicable Prevents bugs from occurring where the results of a function are accidentally discarded --- src/video_core/command_classes/codecs/codec.h | 6 +++--- src/video_core/command_classes/codecs/h264.cpp | 3 ++- src/video_core/command_classes/codecs/h264.h | 10 +++++----- src/video_core/command_classes/codecs/vp9.cpp | 2 +- src/video_core/command_classes/codecs/vp9.h | 2 +- src/video_core/command_classes/codecs/vp9_types.h | 5 ++--- src/video_core/command_classes/nvdec.h | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index cb67094f6..5bbe6a332 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -42,11 +42,11 @@ public: void Decode(); /// Returns most recently decoded frame - AVFrame* GetCurrentFrame(); - const AVFrame* GetCurrentFrame() const; + [[nodiscard]] AVFrame* GetCurrentFrame(); + [[nodiscard]] const AVFrame* GetCurrentFrame() const; /// Returns the value of current_codec - NvdecCommon::VideoCodec GetCurrentCodec() const; + [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; private: bool initialized{}; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 549a40f52..33e063e20 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -43,7 +43,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; -std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { +const std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame) { H264DecoderContext context{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index f2292fd2f..273449495 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -51,14 +51,14 @@ public: void WriteScalingList(const std::vector& list, s32 start, s32 count); /// Return the bitstream as a vector. - std::vector& GetByteArray(); - const std::vector& GetByteArray() const; + [[nodiscard]] std::vector& GetByteArray(); + [[nodiscard]] const std::vector& GetByteArray() const; private: void WriteBits(s32 value, s32 bit_count); void WriteExpGolombCodedInt(s32 value); void WriteExpGolombCodedUInt(u32 value); - s32 GetFreeBufferBits(); + [[nodiscard]] s32 GetFreeBufferBits(); void Flush(); s32 buffer_size{8}; @@ -74,8 +74,8 @@ public: ~H264(); /// Compose the H264 header of the frame for FFmpeg decoding - std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, - bool is_first_frame = false); + [[nodiscard]] const std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); private: struct H264ParameterSet { diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 42520f856..ab44fdc9e 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -854,7 +854,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { return uncomp_writer; } -std::vector& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { +const std::vector& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { std::vector bitstream; { Vp9FrameContainer curr_frame = GetCurrentFrame(state); diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 05c9682fa..e2504512c 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -119,7 +119,7 @@ public: /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec /// documentation - std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); + [[nodiscard]] const std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. [[nodiscard]] bool WasFrameHidden() const { diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index a50acf6e8..4f0b05d22 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -231,9 +231,8 @@ struct PictureInfo { u32 surface_params{}; INSERT_PADDING_WORDS(3); - Vp9PictureInfo Convert() const { - - return Vp9PictureInfo{ + [[nodiscard]] Vp9PictureInfo Convert() const { + return { .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index af14f9857..eec4443f9 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -26,8 +26,8 @@ public: void ProcessMethod(Method method, const std::vector& arguments); /// Return most recently decoded frame - AVFrame* GetFrame(); - const AVFrame* GetFrame() const; + [[nodiscard]] AVFrame* GetFrame(); + [[nodiscard]] const AVFrame* GetFrame() const; private: /// Invoke codec to decode a frame -- cgit v1.2.3 From 6f006d051e1fad075048ea5664e1ef0605e48a46 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 4 Nov 2020 20:41:16 -0500 Subject: General: Fix clang build Allows building on clang to work again --- src/video_core/dma_pusher.h | 7 +++---- src/video_core/gpu.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 8496ba2da..9d9a750d9 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -18,6 +18,8 @@ class System; namespace Tegra { +class GPU; + enum class SubmissionMode : u32 { IncreasingOld = 0, Increasing = 1, @@ -74,8 +76,7 @@ union CommandHeader { static_assert(std::is_standard_layout_v, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); -static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, - SubmissionMode mode) { +inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, SubmissionMode mode) { CommandHeader result{}; result.method.Assign(static_cast(method)); result.arg_count.Assign(arg_count); @@ -83,8 +84,6 @@ static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_ return result; } -class GPU; - struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5444b49f3..cf5235a79 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -273,7 +273,7 @@ public: BitField<0, 1, FenceOperation> op; BitField<8, 24, u32> syncpoint_id; - static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { FenceAction result{}; result.op.Assign(op); result.syncpoint_id.Assign(syncpoint_id); -- cgit v1.2.3 From dc5396a4668b564b2d1f4488d10581dd67fc22db Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Nov 2020 00:08:19 -0800 Subject: video_core: dma_pusher: Remove integrity check on command lists. - This seems to cause softlocks in Breath of the Wild. --- src/video_core/dma_pusher.cpp | 26 +------------------------- src/video_core/dma_pusher.h | 3 --- 2 files changed, 1 insertion(+), 28 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 105b85a92..d8801b1f5 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -13,20 +13,6 @@ namespace Tegra { -void CommandList::RefreshIntegrityChecks(GPU& gpu) { - command_list_hashes.resize(command_lists.size()); - - for (std::size_t index = 0; index < command_lists.size(); ++index) { - const CommandListHeader command_list_header = command_lists[index]; - std::vector command_headers(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), - command_list_header.size * sizeof(u32)); - command_list_hashes[index] = - Common::CityHash64(reinterpret_cast(command_headers.data()), - command_list_header.size * sizeof(u32)); - } -} - DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -77,8 +63,7 @@ bool DmaPusher::Step() { dma_pushbuffer.pop(); } else { const CommandListHeader command_list_header{ - command_list.command_lists[dma_pushbuffer_subindex]}; - const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; + command_list.command_lists[dma_pushbuffer_subindex++]}; const GPUVAddr dma_get = command_list_header.addr; if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { @@ -95,15 +80,6 @@ bool DmaPusher::Step() { command_headers.resize(command_list_header.size); gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); - - // Integrity check - const u64 new_hash = Common::CityHash64(reinterpret_cast(command_headers.data()), - command_list_header.size * sizeof(u32)); - if (new_hash != next_hash) { - LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); - dma_pushbuffer.pop(); - return true; - } } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 9d9a750d9..96ac267f7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -90,10 +90,7 @@ struct CommandList final { explicit CommandList(std::vector&& prefetch_command_list) : prefetch_command_list{std::move(prefetch_command_list)} {} - void RefreshIntegrityChecks(GPU& gpu); - std::vector command_lists; - std::vector command_list_hashes; std::vector prefetch_command_list; }; -- cgit v1.2.3 From 9ea8cffe35630f208024b4f547e9c261703edd06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 Jul 2020 05:03:05 -0300 Subject: maxwell_3d: Move code to separate functions Deduplicate some code and put it in separate functions so it's easier to understand and profile. --- src/video_core/engines/maxwell_3d.cpp | 267 +++++++++++++++------------------- src/video_core/engines/maxwell_3d.h | 8 + 2 files changed, 124 insertions(+), 151 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 57ebc785f..f7fa5fea7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -124,6 +124,114 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } +void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { + if (executing_macro == 0) { + // A macro call must begin by writing the macro method's register, not its argument. + ASSERT_MSG((method % 2) == 0, + "Can't start macro execution by writing to the ARGS register"); + executing_macro = method; + } + + for (std::size_t i = 0; i < amount; i++) { + macro_params.push_back(base_start[i]); + } + + // Call the macro when there are no more parameters in the command buffer + if (is_last_call) { + CallMacroMethod(executing_macro, macro_params); + macro_params.clear(); + } +} + +u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { + // Keep track of the register value in shadow_state when requested. + const auto control = shadow_state.shadow_ram_control; + if (control == Regs::ShadowRamControl::Track || + control == Regs::ShadowRamControl::TrackWithFilter) { + shadow_state.reg_array[method] = argument; + return argument; + } + if (control == Regs::ShadowRamControl::Replay) { + return shadow_state.reg_array[method]; + } + return argument; +} + +void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { + if (regs.reg_array[method] == argument) { + return; + } + regs.reg_array[method] = argument; + + for (const auto& table : dirty.tables) { + dirty.flags[table[method]] = true; + } +} + +void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, + bool is_last_call) { + switch (method) { + case MAXWELL3D_REG_INDEX(wait_for_idle): + return rasterizer->WaitForIdle(); + case MAXWELL3D_REG_INDEX(shadow_ram_control): + shadow_state.shadow_ram_control = static_cast(nonshadow_argument); + return; + case MAXWELL3D_REG_INDEX(macros.data): + return macro_engine->AddCode(regs.macros.upload_address, argument); + case MAXWELL3D_REG_INDEX(macros.bind): + return ProcessMacroBind(argument); + case MAXWELL3D_REG_INDEX(firmware[4]): + return ProcessFirmwareCall4(); + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): + return StartCBData(method); + case MAXWELL3D_REG_INDEX(cb_bind[0]): + return ProcessCBBind(0); + case MAXWELL3D_REG_INDEX(cb_bind[1]): + return ProcessCBBind(1); + case MAXWELL3D_REG_INDEX(cb_bind[2]): + return ProcessCBBind(2); + case MAXWELL3D_REG_INDEX(cb_bind[3]): + return ProcessCBBind(3); + case MAXWELL3D_REG_INDEX(cb_bind[4]): + return ProcessCBBind(4); + case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): + return DrawArrays(); + case MAXWELL3D_REG_INDEX(clear_buffers): + return ProcessClearBuffers(); + case MAXWELL3D_REG_INDEX(query.query_get): + return ProcessQueryGet(); + case MAXWELL3D_REG_INDEX(condition.mode): + return ProcessQueryCondition(); + case MAXWELL3D_REG_INDEX(counter_reset): + return ProcessCounterReset(); + case MAXWELL3D_REG_INDEX(sync_info): + return ProcessSyncPoint(); + case MAXWELL3D_REG_INDEX(exec_upload): + return upload_state.ProcessExec(regs.exec_upload.linear != 0); + case MAXWELL3D_REG_INDEX(data_upload): + upload_state.ProcessData(argument, is_last_call); + if (is_last_call) { + OnMemoryWrite(); + } + return; + } +} + void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters) { // Reset the current macro. executing_macro = 0; @@ -157,142 +265,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. if (method >= MacroRegistersStart) { - // We're trying to execute a macro - if (executing_macro == 0) { - // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method % 2) == 0, - "Can't start macro execution by writing to the ARGS register"); - executing_macro = method; - } - - macro_params.push_back(method_argument); - - // Call the macro when there are no more parameters in the command buffer - if (is_last_call) { - CallMacroMethod(executing_macro, macro_params); - macro_params.clear(); - } + ProcessMacro(method, &method_argument, 1, is_last_call); return; } ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - u32 arg = method_argument; - // Keep track of the register value in shadow_state when requested. - if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || - shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { - shadow_state.reg_array[method] = arg; - } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) { - arg = shadow_state.reg_array[method]; - } - - if (regs.reg_array[method] != arg) { - regs.reg_array[method] = arg; - - for (const auto& table : dirty.tables) { - dirty.flags[table[method]] = true; - } - } - - switch (method) { - case MAXWELL3D_REG_INDEX(wait_for_idle): { - rasterizer->WaitForIdle(); - break; - } - case MAXWELL3D_REG_INDEX(shadow_ram_control): { - shadow_state.shadow_ram_control = static_cast(method_argument); - break; - } - case MAXWELL3D_REG_INDEX(macros.data): { - macro_engine->AddCode(regs.macros.upload_address, arg); - break; - } - case MAXWELL3D_REG_INDEX(macros.bind): { - ProcessMacroBind(arg); - break; - } - case MAXWELL3D_REG_INDEX(firmware[4]): { - ProcessFirmwareCall4(); - break; - } - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { - StartCBData(method); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[0]): { - ProcessCBBind(0); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[1]): { - ProcessCBBind(1); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[2]): { - ProcessCBBind(2); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[3]): { - ProcessCBBind(3); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[4]): { - ProcessCBBind(4); - break; - } - case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { - DrawArrays(); - break; - } - case MAXWELL3D_REG_INDEX(clear_buffers): { - ProcessClearBuffers(); - break; - } - case MAXWELL3D_REG_INDEX(query.query_get): { - ProcessQueryGet(); - break; - } - case MAXWELL3D_REG_INDEX(condition.mode): { - ProcessQueryCondition(); - break; - } - case MAXWELL3D_REG_INDEX(counter_reset): { - ProcessCounterReset(); - break; - } - case MAXWELL3D_REG_INDEX(sync_info): { - ProcessSyncPoint(); - break; - } - case MAXWELL3D_REG_INDEX(exec_upload): { - upload_state.ProcessExec(regs.exec_upload.linear != 0); - break; - } - case MAXWELL3D_REG_INDEX(data_upload): { - upload_state.ProcessData(arg, is_last_call); - if (is_last_call) { - OnMemoryWrite(); - } - break; - } - default: - break; - } + const u32 argument = ProcessShadowRam(method, method_argument); + ProcessDirtyRegisters(method, argument); + ProcessMethodCall(method, argument, method_argument, is_last_call); } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -300,23 +282,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. if (method >= MacroRegistersStart) { - // We're trying to execute a macro - if (executing_macro == 0) { - // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method % 2) == 0, - "Can't start macro execution by writing to the ARGS register"); - executing_macro = method; - } - - for (std::size_t i = 0; i < amount; i++) { - macro_params.push_back(base_start[i]); - } - - // Call the macro when there are no more parameters in the command buffer - if (amount == methods_pending) { - CallMacroMethod(executing_macro, macro_params); - macro_params.clear(); - } + ProcessMacro(method, base_start, amount, amount == methods_pending); return; } switch (method) { @@ -335,15 +301,14 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): ProcessCBMultiData(method, base_start, amount); break; - } - default: { + default: for (std::size_t i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); } - } + break; } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index bc289c55d..1cbe8fe67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1461,6 +1461,14 @@ public: private: void InitializeRegisterDefaults(); + void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call); + + u32 ProcessShadowRam(u32 method, u32 argument); + + void ProcessDirtyRegisters(u32 method, u32 argument); + + void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); + Core::System& system; MemoryManager& memory_manager; -- cgit v1.2.3 From 622830f4e16a8f0eabeb1b81dea5fca613402d8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 Jul 2020 05:05:04 -0300 Subject: maxwell_3d: Use insert instead of loop push_back This reduces the overhead of bounds checking on each element. It won't reduce the cost of allocation because usually this vector's capacity is usually large enough to hold whatever we push to it. --- src/video_core/engines/maxwell_3d.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index f7fa5fea7..6287df633 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -132,9 +132,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool executing_macro = method; } - for (std::size_t i = 0; i < amount; i++) { - macro_params.push_back(base_start[i]); - } + macro_params.insert(macro_params.end(), base_start, base_start + amount); // Call the macro when there are no more parameters in the command buffer if (is_last_call) { -- cgit v1.2.3 From b928fca114d732971541e2bcbd7e9f40aa5497f2 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 17 Nov 2020 07:14:44 -0500 Subject: gpu: Make use of [[nodiscard]] where applicable --- src/video_core/gpu.h | 66 ++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index cf5235a79..21410e125 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -17,11 +17,11 @@ #include "video_core/dma_pusher.h" using CacheAddr = std::uintptr_t; -inline CacheAddr ToCacheAddr(const void* host_ptr) { +[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { return reinterpret_cast(host_ptr); } -inline u8* FromCacheAddr(CacheAddr cache_addr) { +[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) { return reinterpret_cast(cache_addr); } @@ -149,13 +149,13 @@ public: u32 subchannel{}; u32 method_count{}; - bool IsLastCall() const { - return method_count <= 1; - } - MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) : method(method), argument(argument), subchannel(subchannel), method_count(method_count) {} + + [[nodiscard]] bool IsLastCall() const { + return method_count <= 1; + } }; explicit GPU(Core::System& system, bool is_async, bool use_nvdec); @@ -179,10 +179,10 @@ public: virtual void OnCommandListEnd(); /// Request a host GPU memory flush from the CPU. - u64 RequestFlush(VAddr addr, std::size_t size); + [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); /// Obtains current flush request fence id. - u64 CurrentFlushRequestFence() const { + [[nodiscard]] u64 CurrentFlushRequestFence() const { return current_flush_fence.load(std::memory_order_relaxed); } @@ -190,48 +190,52 @@ public: void TickWork(); /// Returns a reference to the Maxwell3D GPU engine. - Engines::Maxwell3D& Maxwell3D(); + [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); /// Returns a const reference to the Maxwell3D GPU engine. - const Engines::Maxwell3D& Maxwell3D() const; + [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const; /// Returns a reference to the KeplerCompute GPU engine. - Engines::KeplerCompute& KeplerCompute(); + [[nodiscard]] Engines::KeplerCompute& KeplerCompute(); /// Returns a reference to the KeplerCompute GPU engine. - const Engines::KeplerCompute& KeplerCompute() const; + [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const; /// Returns a reference to the GPU memory manager. - Tegra::MemoryManager& MemoryManager(); + [[nodiscard]] Tegra::MemoryManager& MemoryManager(); /// Returns a const reference to the GPU memory manager. - const Tegra::MemoryManager& MemoryManager() const; + [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const; /// Returns a reference to the GPU DMA pusher. - Tegra::DmaPusher& DmaPusher(); + [[nodiscard]] Tegra::DmaPusher& DmaPusher(); /// Returns a const reference to the GPU DMA pusher. - const Tegra::DmaPusher& DmaPusher() const; + [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const; /// Returns a reference to the GPU CDMA pusher. - Tegra::CDmaPusher& CDmaPusher(); + [[nodiscard]] Tegra::CDmaPusher& CDmaPusher(); /// Returns a const reference to the GPU CDMA pusher. - const Tegra::CDmaPusher& CDmaPusher() const; + [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; - VideoCore::RendererBase& Renderer() { + /// Returns a reference to the underlying renderer. + [[nodiscard]] VideoCore::RendererBase& Renderer() { return *renderer; } - const VideoCore::RendererBase& Renderer() const { + /// Returns a const reference to the underlying renderer. + [[nodiscard]] const VideoCore::RendererBase& Renderer() const { return *renderer; } - VideoCore::ShaderNotify& ShaderNotify() { + /// Returns a reference to the shader notifier. + [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { return *shader_notify; } - const VideoCore::ShaderNotify& ShaderNotify() const { + /// Returns a const reference to the shader notifier. + [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { return *shader_notify; } @@ -243,23 +247,23 @@ public: void IncrementSyncPoint(u32 syncpoint_id); - u32 GetSyncpointValue(u32 syncpoint_id) const; + [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const; void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); - bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); - u64 GetTicks() const; + [[nodiscard]] u64 GetTicks() const; - std::unique_lock LockSync() { + [[nodiscard]] std::unique_lock LockSync() { return std::unique_lock{sync_mutex}; } - bool IsAsync() const { + [[nodiscard]] bool IsAsync() const { return is_async; } - bool UseNvdec() const { + [[nodiscard]] bool UseNvdec() const { return use_nvdec; } @@ -273,7 +277,7 @@ public: BitField<0, 1, FenceOperation> op; BitField<8, 24, u32> syncpoint_id; - static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { FenceAction result{}; result.op.Assign(op); result.syncpoint_id.Assign(syncpoint_id); @@ -291,7 +295,7 @@ public: u32 address_high; u32 address_low; - GPUVAddr SemaphoreAddress() const { + [[nodiscard]] GPUVAddr SemaphoreAddress() const { return static_cast((static_cast(address_high) << 32) | address_low); } @@ -374,7 +378,7 @@ private: u32 methods_pending); /// Determines where the method should be executed. - bool ExecuteMethodOnEngine(u32 method); + [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); protected: Core::System& system; -- cgit v1.2.3 From a78021580d9d37dfb1d1c72256e9e508ebc24b0e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 17 Nov 2020 07:16:00 -0500 Subject: render_base: Make use of [[nodiscard]] where applicable --- src/video_core/renderer_base.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 5c650808b..51dde8eb5 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -38,7 +38,7 @@ public: virtual ~RendererBase(); /// Initialize the renderer - virtual bool Init() = 0; + [[nodiscard]] virtual bool Init() = 0; /// Shutdown the renderer virtual void ShutDown() = 0; @@ -49,43 +49,43 @@ public: // Getter/setter functions: // ------------------------ - f32 GetCurrentFPS() const { + [[nodiscard]] f32 GetCurrentFPS() const { return m_current_fps; } - int GetCurrentFrame() const { + [[nodiscard]] int GetCurrentFrame() const { return m_current_frame; } - RasterizerInterface& Rasterizer() { + [[nodiscard]] RasterizerInterface& Rasterizer() { return *rasterizer; } - const RasterizerInterface& Rasterizer() const { + [[nodiscard]] const RasterizerInterface& Rasterizer() const { return *rasterizer; } - Core::Frontend::GraphicsContext& Context() { + [[nodiscard]] Core::Frontend::GraphicsContext& Context() { return *context; } - const Core::Frontend::GraphicsContext& Context() const { + [[nodiscard]] const Core::Frontend::GraphicsContext& Context() const { return *context; } - Core::Frontend::EmuWindow& GetRenderWindow() { + [[nodiscard]] Core::Frontend::EmuWindow& GetRenderWindow() { return render_window; } - const Core::Frontend::EmuWindow& GetRenderWindow() const { + [[nodiscard]] const Core::Frontend::EmuWindow& GetRenderWindow() const { return render_window; } - RendererSettings& Settings() { + [[nodiscard]] RendererSettings& Settings() { return renderer_settings; } - const RendererSettings& Settings() const { + [[nodiscard]] const RendererSettings& Settings() const { return renderer_settings; } -- cgit v1.2.3 From 70812ec57b37263ebcf12719fd650dddf4d45b23 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 17 Nov 2020 07:17:43 -0500 Subject: rasterizer_interface: Make use of [[nodiscard]] where applicable --- src/video_core/rasterizer_interface.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index b3e0919f8..27ef4c69a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -32,7 +32,7 @@ using DiskResourceLoadCallback = std::function index; u64 value; @@ -658,7 +658,7 @@ union Instruction { return *this; } - constexpr Instruction(u64 value) : value{value} {} + constexpr Instruction(u64 value_) : value{value_} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} constexpr bool Bit(u64 offset) const { @@ -1624,12 +1624,13 @@ union Instruction { s32 GetBranchTarget() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast(target); + constexpr auto instruction_size = static_cast(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast((value ^ mask) - mask) / static_cast(sizeof(Instruction)) + - 1; + return static_cast((target_value ^ mask) - mask) / instruction_size + 1; } } bra; @@ -1639,12 +1640,13 @@ union Instruction { s32 GetBranchExtend() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast(target); + constexpr auto instruction_size = static_cast(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast((value ^ mask) - mask) / static_cast(sizeof(Instruction)) + - 1; + return static_cast((target_value ^ mask) - mask) / instruction_size + 1; } } brx; @@ -2004,8 +2006,8 @@ public: class Matcher { public: - constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) - : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} + constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) + : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} constexpr const char* GetName() const { return name; -- cgit v1.2.3 From b7cd5d742e125d17f5530bcb38fac40d1f41ea4d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 02:20:34 -0500 Subject: shader_bytecode: Make use of [[nodiscard]] where applicable Ensures that all queried values are made use of. --- src/video_core/engines/shader_bytecode.h | 152 ++++++++++++++++--------------- 1 file changed, 79 insertions(+), 73 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 1640207a7..37d17efdc 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -34,29 +34,29 @@ struct Register { constexpr Register(u64 value_) : value(value_) {} - constexpr operator u64() const { + [[nodiscard]] constexpr operator u64() const { return value; } template - constexpr u64 operator-(const T& oth) const { + [[nodiscard]] constexpr u64 operator-(const T& oth) const { return value - oth; } template - constexpr u64 operator&(const T& oth) const { + [[nodiscard]] constexpr u64 operator&(const T& oth) const { return value & oth; } - constexpr u64 operator&(const Register& oth) const { + [[nodiscard]] constexpr u64 operator&(const Register& oth) const { return value & oth.value; } - constexpr u64 operator~() const { + [[nodiscard]] constexpr u64 operator~() const { return ~value; } - u64 GetSwizzledIndex(u64 elem) const { + [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { elem = (value + elem) & 3; return (value & ~3) + elem; } @@ -107,7 +107,7 @@ union Attribute { BitField<31, 1, u64> patch; BitField<47, 3, AttributeSize> size; - bool IsPhysical() const { + [[nodiscard]] bool IsPhysical() const { return patch == 0 && element == 0 && static_cast(index.Value()) == 0; } } fmt20; @@ -505,14 +505,14 @@ struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; - bool operator==(const IpaMode& a) const { + [[nodiscard]] bool operator==(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) == std::tie(a.interpolation_mode, a.sampling_mode); } - bool operator!=(const IpaMode& a) const { + [[nodiscard]] bool operator!=(const IpaMode& a) const { return !operator==(a); } - bool operator<(const IpaMode& a) const { + [[nodiscard]] bool operator<(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) < std::tie(a.interpolation_mode, a.sampling_mode); } @@ -661,7 +661,7 @@ union Instruction { constexpr Instruction(u64 value_) : value{value_} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} - constexpr bool Bit(u64 offset) const { + [[nodiscard]] constexpr bool Bit(u64 offset) const { return ((value >> offset) & 1) != 0; } @@ -746,34 +746,34 @@ union Instruction { BitField<28, 8, u64> imm_lut28; BitField<48, 8, u64> imm_lut48; - u32 GetImmLut28() const { + [[nodiscard]] u32 GetImmLut28() const { return static_cast(imm_lut28); } - u32 GetImmLut48() const { + [[nodiscard]] u32 GetImmLut48() const { return static_cast(imm_lut48); } } lop3; - u16 GetImm20_16() const { + [[nodiscard]] u16 GetImm20_16() const { return static_cast(imm20_16); } - u32 GetImm20_19() const { + [[nodiscard]] u32 GetImm20_19() const { u32 imm{static_cast(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; return imm; } - u32 GetImm20_32() const { + [[nodiscard]] u32 GetImm20_32() const { return static_cast(imm20_32); } - s32 GetSignedImm20_20() const { - u32 immediate = static_cast(imm20_19 | (negate_imm << 19)); + [[nodiscard]] s32 GetSignedImm20_20() const { + const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); // Sign extend the 20-bit value. - u32 mask = 1U << (20 - 1); + const auto mask = 1U << (20 - 1); return static_cast((immediate ^ mask) - mask); } } alu; @@ -857,7 +857,7 @@ union Instruction { BitField<56, 1, u64> second_negate; BitField<30, 9, u64> second; - u32 PackImmediates() const { + [[nodiscard]] u32 PackImmediates() const { // Immediates are half floats shifted. constexpr u32 imm_shift = 6; return static_cast((first << imm_shift) | (second << (16 + imm_shift))); @@ -1033,7 +1033,7 @@ union Instruction { BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; - s32 GetImmediateOffset() const { + [[nodiscard]] s32 GetImmediateOffset() const { return static_cast(offset << 2); } } atoms; @@ -1215,7 +1215,7 @@ union Instruction { BitField<39, 4, u64> rounding; // H0, H1 extract for F16 missing BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - F2fRoundingOp GetRoundingMode() const { + [[nodiscard]] F2fRoundingOp GetRoundingMode() const { constexpr u64 rounding_mask = 0x0B; return static_cast(rounding.Value() & rounding_mask); } @@ -1239,15 +1239,15 @@ union Instruction { BitField<54, 1, u64> aoffi_flag; BitField<55, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1271,15 +1271,15 @@ union Instruction { BitField<36, 1, u64> aoffi_flag; BitField<37, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1299,7 +1299,7 @@ union Instruction { BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NODEP: return nodep_flag != 0; @@ -1309,7 +1309,7 @@ union Instruction { return false; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } } txq; @@ -1321,11 +1321,11 @@ union Instruction { BitField<35, 1, u64> ndv_flag; BitField<49, 1, u64> nodep_flag; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return (ndv_flag != 0); @@ -1347,7 +1347,7 @@ union Instruction { BitField<54, 2, u64> offset_mode; BitField<56, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1373,7 +1373,7 @@ union Instruction { BitField<33, 2, u64> offset_mode; BitField<37, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1399,7 +1399,7 @@ union Instruction { BitField<52, 2, u64> component; BitField<55, 1, u64> fp16_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1422,16 +1422,20 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) + if (texture_info == 0) { return TextureType::Texture1D; - if (texture_info >= 1 && texture_info <= 9) + } + if (texture_info >= 1 && texture_info <= 9) { return TextureType::Texture2D; - if (texture_info >= 10 && texture_info <= 11) + } + if (texture_info >= 10 && texture_info <= 11) { return TextureType::Texture3D; - if (texture_info >= 12 && texture_info <= 13) + } + if (texture_info >= 12 && texture_info <= 13) { return TextureType::TextureCube; + } LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", static_cast(texture_info.Value())); @@ -1439,7 +1443,7 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { switch (texture_info) { case 0: case 2: @@ -1458,7 +1462,7 @@ union Instruction { return TextureProcessMode::None; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; @@ -1470,16 +1474,16 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info >= 7 && texture_info <= 9; } - bool HasTwoDestinations() const { + [[nodiscard]] bool HasTwoDestinations() const { return gpr28.Value() != Register::ZeroIndex; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { static constexpr std::array, 4> mask_lut{{ {}, {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, @@ -1506,7 +1510,7 @@ union Instruction { BitField<54, 1, u64> cl; BitField<55, 1, u64> process_mode; - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; } } tld; @@ -1516,7 +1520,7 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. if (texture_info <= 1) { return TextureType::Texture1D; @@ -1535,13 +1539,14 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { + if (texture_info == 1 || texture_info == 5 || texture_info == 12) { return TextureProcessMode::LL; + } return TextureProcessMode::LZ; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return texture_info == 12 || texture_info == 4; @@ -1555,7 +1560,7 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info == 8; } @@ -1567,7 +1572,7 @@ union Instruction { BitField<35, 1, u64> aoffi_flag; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return aoffi_flag != 0; @@ -1591,7 +1596,7 @@ union Instruction { BitField<20, 3, StoreType> store_data_layout; BitField<20, 4, u64> component_mask_selector; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { ASSERT(mode == SurfaceDataMode::P); constexpr u8 R = 0b0001; constexpr u8 G = 0b0010; @@ -1604,7 +1609,7 @@ union Instruction { return std::bitset<4>{mask.at(component_mask_selector)}.test(component); } - StoreType GetStoreDataLayout() const { + [[nodiscard]] StoreType GetStoreDataLayout() const { ASSERT(mode == SurfaceDataMode::D_BA); return store_data_layout; } @@ -1622,7 +1627,7 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchTarget() const { + [[nodiscard]] s32 GetBranchTarget() const { // Sign extend the branch target offset const auto mask = 1U << (24 - 1); const auto target_value = static_cast(target); @@ -1638,7 +1643,7 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchExtend() const { + [[nodiscard]] s32 GetBranchExtend() const { // Sign extend the branch target offset const auto mask = 1U << (24 - 1); const auto target_value = static_cast(target); @@ -1699,7 +1704,7 @@ union Instruction { BitField<50, 1, u64> is_op_b_register; BitField<51, 3, VmnmxOperation> operation; - VmnmxType SourceFormatA() const { + [[nodiscard]] VmnmxType SourceFormatA() const { switch (src_format_a) { case 0b11: return VmnmxType::Bits32; @@ -1710,7 +1715,7 @@ union Instruction { } } - VmnmxType SourceFormatB() const { + [[nodiscard]] VmnmxType SourceFormatB() const { switch (src_format_b) { case 0b11: return VmnmxType::Bits32; @@ -1741,7 +1746,7 @@ union Instruction { BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; - u64 GetOffset() const { + [[nodiscard]] u64 GetOffset() const { return shifted_offset * 4; } } cbuf34; @@ -1750,7 +1755,7 @@ union Instruction { BitField<20, 16, s64> offset; BitField<36, 5, u64> index; - s64 GetOffset() const { + [[nodiscard]] s64 GetOffset() const { return offset; } } cbuf36; @@ -1999,7 +2004,7 @@ public: /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be /// conditionally executed). - static bool IsPredicatedInstruction(Id opcode) { + [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { // TODO(Subv): Add the rest of unpredicated instructions. return opcode != Id::SSY && opcode != Id::PBK; } @@ -2009,19 +2014,19 @@ public: constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - constexpr const char* GetName() const { + [[nodiscard]] constexpr const char* GetName() const { return name; } - constexpr u16 GetMask() const { + [[nodiscard]] constexpr u16 GetMask() const { return mask; } - constexpr Id GetId() const { + [[nodiscard]] constexpr Id GetId() const { return id; } - constexpr Type GetType() const { + [[nodiscard]] constexpr Type GetType() const { return type; } @@ -2030,7 +2035,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - constexpr bool Matches(u16 instruction) const { + [[nodiscard]] constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -2042,7 +2047,8 @@ public: Type type; }; - static std::optional> Decode(Instruction instr) { + using DecodeResult = std::optional>; + [[nodiscard]] static DecodeResult Decode(Instruction instr) { static const auto table{GetDecodeTable()}; const auto matches_instruction = [instr](const auto& matcher) { @@ -2064,7 +2070,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static constexpr auto GetMaskAndExpect(const char* const bitstring) { + [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -2086,14 +2092,14 @@ private: public: /// Creates a matcher that can match and parse instructions based on bitstring. - static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, - const char* const name) { + [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, + Type type, const char* const name) { const auto [mask, expected] = GetMaskAndExpect(bitstring); return Matcher(name, mask, expected, op, type); } }; - static std::vector GetDecodeTable() { + [[nodiscard]] static std::vector GetDecodeTable() { std::vector table = { #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) INST("111000110011----", Id::KIL, Type::Flow, "KIL"), -- cgit v1.2.3 From 8469b766303a21790863447375416e66250f42bc Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 04:24:30 -0500 Subject: gl_rasterizer: Make floating-point literal a float Gets rid of an unnecessary expansion from float to double. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 36bf92808..3d0777000 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1156,7 +1156,7 @@ void RasterizerOpenGL::SyncViewport() { flags[Dirty::ClipControl] = false; bool flip_y = false; - if (regs.viewport_transform[0].scale_y < 0.0) { + if (regs.viewport_transform[0].scale_y < 0.0f) { flip_y = !flip_y; } if (regs.screen_y_control.y_negate != 0) { -- cgit v1.2.3 From 5b441fa25d7003d7fc85584d2e5c30e9d54e69f6 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 04:34:02 -0500 Subject: async_shaders: std::move data within QueueVulkanShader() Same behavior, but avoids redundant copies. While we're at it, we can simplify the pushing of the parameters into the pending queue. --- src/video_core/shader/async_shaders.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 39cc3b869..c106b2a20 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -153,8 +153,8 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .descriptor_pool = &descriptor_pool, .update_descriptor_queue = &update_descriptor_queue, .renderpass_cache = &renderpass_cache, - .bindings = bindings, - .program = program, + .bindings = std::move(bindings), + .program = std::move(program), .key = key, }; -- cgit v1.2.3 From 3fcc98e11adc1cafc4644483a81b29e55e90d11a Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 04:41:27 -0500 Subject: async_shaders: Simplify moving data into the pending queue --- src/video_core/shader/async_shaders.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index c106b2a20..c6bd75b7c 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -116,11 +116,10 @@ std::vector AsyncShaders::GetCompletedWork() { void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, u64 uid, std::vector code, std::vector code_b, - u32 main_offset, - VideoCommon::Shader::CompilerSettings compiler_settings, - const VideoCommon::Shader::Registry& registry, - VAddr cpu_addr) { - WorkerParams params{ + u32 main_offset, CompilerSettings compiler_settings, + const Registry& registry, VAddr cpu_addr) { + std::unique_lock lock(queue_mutex); + pending_queue.push({ .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, .device = &device, .shader_type = shader_type, @@ -131,9 +130,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, .compiler_settings = compiler_settings, .registry = registry, .cpu_address = cpu_addr, - }; - std::unique_lock lock(queue_mutex); - pending_queue.push(std::move(params)); + }); cv.notify_one(); } @@ -145,7 +142,8 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, std::vector bindings, Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key) { - WorkerParams params{ + std::unique_lock lock(queue_mutex); + pending_queue.push({ .backend = Backend::Vulkan, .pp_cache = pp_cache, .vk_device = &device, @@ -156,10 +154,7 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .bindings = std::move(bindings), .program = std::move(program), .key = key, - }; - - std::unique_lock lock(queue_mutex); - pending_queue.push(std::move(params)); + }); cv.notify_one(); } -- cgit v1.2.3 From ba3916fc67bac5f9cb40ebc91fccca065e877174 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 04:44:42 -0500 Subject: async_shaders: Simplify implementation of GetCompletedWork() This is equivalent to moving all the contents and then clearing the vector. This avoids a redundant allocation. --- src/video_core/shader/async_shaders.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index c6bd75b7c..85cda31c0 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -106,8 +106,7 @@ std::vector AsyncShaders::GetCompletedWork() { std::vector results; { std::unique_lock lock{completed_mutex}; - results.assign(std::make_move_iterator(finished_work.begin()), - std::make_move_iterator(finished_work.end())); + results = std::move(finished_work); finished_work.clear(); } return results; -- cgit v1.2.3 From 01db5cf20313125e2a88a6df1bb1696c0f08f346 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 04:46:53 -0500 Subject: async_shaders: emplace threads into the worker thread vector Same behavior, but constructs the threads in place instead of moving them. --- src/video_core/shader/async_shaders.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 85cda31c0..6920afdf2 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -43,8 +43,8 @@ void AsyncShaders::AllocateWorkers() { // Create workers for (std::size_t i = 0; i < num_workers; i++) { context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.push_back( - std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); + worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, + context_list[i].get()); } } -- cgit v1.2.3 From acc14d233fdd69c0dfcce89660e27cf477083189 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 20 Nov 2020 23:17:40 -0300 Subject: gl_rasterizer: Remove warning of untested alpha test Alpha test has been proven to only affect the first render target. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 36bf92808..cdcde7c59 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1579,10 +1579,6 @@ void RasterizerOpenGL::SyncAlphaTest() { flags[Dirty::AlphaTest] = false; const auto& regs = maxwell3d.regs; - if (regs.alpha_test_enabled && regs.rt_control.count > 1) { - LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); - } - if (regs.alpha_test_enabled) { glEnable(GL_ALPHA_TEST); glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref); -- cgit v1.2.3 From 994f4977810749c0b597e7a7531a02d907967a68 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 22 Nov 2020 16:05:18 -0500 Subject: Overhaul EmuWindow::PollEvents to fix yuzu-cmd calling SDL_PollEvents off main thread EmuWindow::PollEvents was called from the GPU thread (or the CPU thread in sync-GPU mode) when swapping buffers. It had three implementations: - In GRenderWindow, it didn't actually poll events, just set a flag and emit a signal to indicate that a frame was displayed. - In EmuWindow_SDL2_Hide, it did nothing. - In EmuWindow_SDL2, it did call SDL_PollEvents, but this is wrong because SDL_PollEvents is supposed to be called on the thread that set up video - in this case, the main thread, which was sleeping in a busyloop (regardless of whether sync-GPU was enabled). On macOS this causes a crash. To fix this: - Rename EmuWindow::PollEvents to OnFrameDisplayed, and give it a default implementation that does nothing. - In EmuWindow_SDL2, do not override OnFrameDisplayed, but instead have the main thread call SDL_WaitEvent in a loop. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2ccca1993..c869bb0e2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -151,8 +151,8 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer->TickFrame(); - render_window.PollEvents(); context->SwapBuffers(); + render_window.OnFrameDisplayed(); } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f2610868e..a2173edd2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -252,8 +252,6 @@ RendererVulkan::~RendererVulkan() { } void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - render_window.PollEvents(); - if (!framebuffer) { return; } @@ -283,7 +281,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer->TickFrame(); } - render_window.PollEvents(); + render_window.OnFrameDisplayed(); } bool RendererVulkan::Init() { -- cgit v1.2.3 From e8b2fd21d861997e558180d775b14afdc46f3bbd Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 22 Nov 2020 15:48:23 -0500 Subject: nvdrv, video_core: Don't index out of bounds when given invalid syncpoint ID - Use .at() instead of raw indexing when dealing with untrusted indices. - For the special case of WaitFence with syncpoint id UINT32_MAX, instead of crashing, log an error and ignore. This is what I get when running Super Mario Maker 2. --- src/video_core/gpu.cpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ebd149c3a..e91f52938 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -95,22 +95,29 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { if (!is_async) { return; } + if (syncpoint_id == UINT32_MAX) { + // TODO: Research what this does. + LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); + return; + } MICROPROFILE_SCOPE(GPU_wait); std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); + sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { - syncpoints[syncpoint_id]++; + auto& syncpoint = syncpoints.at(syncpoint_id); + syncpoint++; std::lock_guard lock{sync_mutex}; sync_cv.notify_all(); - if (!syncpt_interrupts[syncpoint_id].empty()) { - u32 value = syncpoints[syncpoint_id].load(); - auto it = syncpt_interrupts[syncpoint_id].begin(); - while (it != syncpt_interrupts[syncpoint_id].end()) { + auto& interrupt = syncpt_interrupts.at(syncpoint_id); + if (!interrupt.empty()) { + u32 value = syncpoint.load(); + auto it = interrupt.begin(); + while (it != interrupt.end()) { if (value >= *it) { TriggerCpuInterrupt(syncpoint_id, *it); - it = syncpt_interrupts[syncpoint_id].erase(it); + it = interrupt.erase(it); continue; } it++; @@ -119,22 +126,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) { } u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { - return syncpoints[syncpoint_id].load(); + return syncpoints.at(syncpoint_id).load(); } void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { - auto& interrupt = syncpt_interrupts[syncpoint_id]; + auto& interrupt = syncpt_interrupts.at(syncpoint_id); bool contains = std::any_of(interrupt.begin(), interrupt.end(), [value](u32 in_value) { return in_value == value; }); if (contains) { return; } - syncpt_interrupts[syncpoint_id].emplace_back(value); + interrupt.emplace_back(value); } bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { std::lock_guard lock{sync_mutex}; - auto& interrupt = syncpt_interrupts[syncpoint_id]; + auto& interrupt = syncpt_interrupts.at(syncpoint_id); const auto iter = std::find_if(interrupt.begin(), interrupt.end(), [value](u32 interrupt_value) { return value == interrupt_value; }); -- cgit v1.2.3 From 9014861858295489cf597322801b37dad9aaf2ce Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 18 Nov 2020 20:08:51 -0500 Subject: vulkan_renderer: Alpha Test Culling Implementation Used by various textures in many titles, e.g. SSBU menu. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 5 ++ .../renderer_vulkan/fixed_pipeline_state.h | 8 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 8 ++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_shader_decompiler.h | 3 ++ 5 files changed, 76 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index da5c550ea..1b9611c59 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -60,6 +60,11 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); + alpha_raw = 0; + alpha_test_enabled.Assign(regs.alpha_test_enabled); + alpha_test_func.Assign(PackComparisonOp(regs.alpha_test_func)); + std::memcpy(&alpha_test_ref, ®s.alpha_test_ref, sizeof(u32)); // TODO: C++20 std::bit_cast + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 2c18eeaae..9a45ec6b7 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -187,6 +187,14 @@ struct FixedPipelineState { BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; + + u32 alpha_test_ref; /// < Alpha test reference + union { + u32 alpha_raw; + BitField<0, 3, u32> alpha_test_func; + BitField<3, 1, u32> alpha_test_enabled; + }; + u32 point_size; std::array binding_divisors; std::array attributes; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dedc9c466..9ccf5d011 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -344,6 +344,14 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { } specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; + // Alpha test + if (fixed_state.alpha_test_enabled == 1) { + specialization.alpha_test_enabled = true; + specialization.alpha_test_func = static_cast(fixed_state.alpha_test_func); + // memcpy from u32 to float TODO: C++20 std::bit_cast + std::memcpy(&specialization.alpha_test_ref, &fixed_state.alpha_test_ref, sizeof(float)); + } + SPIRVProgram program; std::vector bindings; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a20452b87..356d2ab7a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2075,6 +2075,55 @@ private: return {}; } + void AlphaTest(const Id& pointer) { + const Id true_label = OpLabel(); + const Id skip_label = OpLabel(); + Id condition; + switch (specialization.alpha_test_func) { + case VK_COMPARE_OP_NEVER: + condition = Constant(t_float, false); // Never true + break; + case VK_COMPARE_OP_LESS: + condition = OpFOrdLessThan(t_bool, Constant(t_float, specialization.alpha_test_ref), + OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_EQUAL: + condition = OpFOrdEqual(t_bool, Constant(t_float, specialization.alpha_test_ref), + OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_LESS_OR_EQUAL: + condition = OpFOrdLessThanEqual( + t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_GREATER: + // Note: requires "Equal" to properly work for ssbu. perhaps a precision issue + condition = OpFOrdGreaterThanEqual( + t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_NOT_EQUAL: + // Note: not accurate when tested against a unit test + // TODO: confirm if used by games + condition = OpFOrdNotEqual(t_bool, Constant(t_float, specialization.alpha_test_ref), + OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + condition = OpFOrdGreaterThanEqual( + t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); + break; + case VK_COMPARE_OP_ALWAYS: + condition = Constant(t_bool, true); // Always true + break; + default: + LOG_WARNING(Render_Vulkan, "Unimplemented alpha test function"); + condition = Constant(t_bool, true); // Always true + break; + } + OpBranchConditional(condition, true_label, skip_label); + AddLabel(true_label); + OpKill(); + AddLabel(skip_label); + } + void PreExit() { if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { const u32 position_index = out_indices.position.value(); @@ -2097,8 +2146,6 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - // TODO(Rodrigo): Alpha testing - // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. u32 current_reg = 0; @@ -2110,6 +2157,9 @@ private: } const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); OpStore(pointer, SafeGetRegister(current_reg)); + if (specialization.alpha_test_enabled && component == 3) { + AlphaTest(pointer); + } ++current_reg; } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 2b0e90396..ddbcb0b41 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -95,6 +95,9 @@ struct Specialization final { std::bitset enabled_attributes; std::array attribute_types{}; bool ndc_minus_one_to_one{}; + bool alpha_test_enabled{}; + float alpha_test_ref{}; + u8 alpha_test_func{}; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v); -- cgit v1.2.3 From 1dbf71ceb3b84691101228a2981cafed477b27e9 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 19 Nov 2020 02:25:37 -0500 Subject: Address PR feedback from Rein --- .../renderer_vulkan/fixed_pipeline_state.cpp | 5 ++- .../renderer_vulkan/fixed_pipeline_state.h | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 10 ++--- .../renderer_vulkan/vk_shader_decompiler.cpp | 50 ++++++++++------------ .../renderer_vulkan/vk_shader_decompiler.h | 3 +- 5 files changed, 31 insertions(+), 40 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1b9611c59..192828300 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -61,8 +61,9 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta topology.Assign(regs.draw.topology); alpha_raw = 0; - alpha_test_enabled.Assign(regs.alpha_test_enabled); - alpha_test_func.Assign(PackComparisonOp(regs.alpha_test_func)); + const auto test_func = + regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; + alpha_test_func.Assign(PackComparisonOp(test_func)); std::memcpy(&alpha_test_ref, ®s.alpha_test_ref, sizeof(u32)); // TODO: C++20 std::bit_cast std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 9a45ec6b7..42480e8d0 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -188,11 +188,10 @@ struct FixedPipelineState { BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; - u32 alpha_test_ref; /// < Alpha test reference + u32 alpha_test_ref; ///< Alpha test reference value union { u32 alpha_raw; BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> alpha_test_enabled; }; u32 point_size; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9ccf5d011..a66a841fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -345,12 +345,10 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; // Alpha test - if (fixed_state.alpha_test_enabled == 1) { - specialization.alpha_test_enabled = true; - specialization.alpha_test_func = static_cast(fixed_state.alpha_test_func); - // memcpy from u32 to float TODO: C++20 std::bit_cast - std::memcpy(&specialization.alpha_test_ref, &fixed_state.alpha_test_ref, sizeof(float)); - } + specialization.alpha_test_func = + FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); + // memcpy from u32 to float TODO: C++20 std::bit_cast + std::memcpy(&specialization.alpha_test_ref, &fixed_state.alpha_test_ref, sizeof(float)); SPIRVProgram program; std::vector bindings; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 356d2ab7a..81550bc96 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2075,48 +2075,42 @@ private: return {}; } - void AlphaTest(const Id& pointer) { + void AlphaTest(Id pointer) { const Id true_label = OpLabel(); const Id skip_label = OpLabel(); + const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); + const Id alpha_value = OpLoad(t_float, pointer); Id condition; + using Compare = Maxwell::ComparisonOp; switch (specialization.alpha_test_func) { - case VK_COMPARE_OP_NEVER: - condition = Constant(t_float, false); // Never true + case Compare::NeverOld: + condition = v_false; // Never true break; - case VK_COMPARE_OP_LESS: - condition = OpFOrdLessThan(t_bool, Constant(t_float, specialization.alpha_test_ref), - OpLoad(t_float, pointer)); + case Compare::LessOld: + condition = OpFOrdLessThan(t_bool, alpha_reference, alpha_value); break; - case VK_COMPARE_OP_EQUAL: - condition = OpFOrdEqual(t_bool, Constant(t_float, specialization.alpha_test_ref), - OpLoad(t_float, pointer)); + case Compare::EqualOld: + condition = OpFOrdEqual(t_bool, alpha_reference, alpha_value); break; - case VK_COMPARE_OP_LESS_OR_EQUAL: - condition = OpFOrdLessThanEqual( - t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); + case Compare::LessEqualOld: + condition = OpFOrdLessThanEqual(t_bool, alpha_reference, alpha_value); break; - case VK_COMPARE_OP_GREATER: + case Compare::GreaterOld: // Note: requires "Equal" to properly work for ssbu. perhaps a precision issue - condition = OpFOrdGreaterThanEqual( - t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); + condition = OpFOrdGreaterThanEqual(t_bool, alpha_reference, alpha_value); break; - case VK_COMPARE_OP_NOT_EQUAL: + case Compare::NotEqualOld: // Note: not accurate when tested against a unit test // TODO: confirm if used by games - condition = OpFOrdNotEqual(t_bool, Constant(t_float, specialization.alpha_test_ref), - OpLoad(t_float, pointer)); + condition = OpFOrdNotEqual(t_bool, alpha_reference, alpha_value); break; - case VK_COMPARE_OP_GREATER_OR_EQUAL: - condition = OpFOrdGreaterThanEqual( - t_bool, Constant(t_float, specialization.alpha_test_ref), OpLoad(t_float, pointer)); - break; - case VK_COMPARE_OP_ALWAYS: - condition = Constant(t_bool, true); // Always true + case Compare::GreaterEqualOld: + condition = OpFOrdGreaterThanEqual(t_bool, alpha_reference, alpha_value); break; + case Compare::AlwaysOld: + return; default: - LOG_WARNING(Render_Vulkan, "Unimplemented alpha test function"); - condition = Constant(t_bool, true); // Always true - break; + UNREACHABLE(); } OpBranchConditional(condition, true_label, skip_label); AddLabel(true_label); @@ -2157,7 +2151,7 @@ private: } const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); OpStore(pointer, SafeGetRegister(current_reg)); - if (specialization.alpha_test_enabled && component == 3) { + if (rt == 0 && component == 3) { AlphaTest(pointer); } ++current_reg; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index ddbcb0b41..cd3d0a415 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -95,9 +95,8 @@ struct Specialization final { std::bitset enabled_attributes; std::array attribute_types{}; bool ndc_minus_one_to_one{}; - bool alpha_test_enabled{}; float alpha_test_ref{}; - u8 alpha_test_func{}; + Maxwell::ComparisonOp alpha_test_func{}; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v); -- cgit v1.2.3 From e87670ee48c896ba029a11ad590234e00260f875 Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 25 Nov 2020 00:33:20 -0500 Subject: Refactor MaxwellToSpirvComparison. Use Common::BitCast Co-Authored-By: Rodrigo Locatti --- .../renderer_vulkan/fixed_pipeline_state.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 56 +++++++++++----------- 3 files changed, 34 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 192828300..fffae528e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -8,6 +8,7 @@ #include +#include "common/bit_cast.h" #include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" @@ -64,9 +65,9 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta const auto test_func = regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); - std::memcpy(&alpha_test_ref, ®s.alpha_test_ref, sizeof(u32)); // TODO: C++20 std::bit_cast + alpha_test_ref = Common::BitCast(regs.alpha_test_ref); - std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast + point_size = Common::BitCast(regs.point_size); for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { binding_divisors[index] = diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a66a841fb..f9efe526d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -7,6 +7,7 @@ #include #include +#include "common/bit_cast.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -347,8 +348,7 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { // Alpha test specialization.alpha_test_func = FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - // memcpy from u32 to float TODO: C++20 std::bit_cast - std::memcpy(&specialization.alpha_test_ref, &fixed_state.alpha_test_ref, sizeof(float)); + specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); SPIRVProgram program; std::vector bindings; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 81550bc96..d6685cd12 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2075,47 +2075,49 @@ private: return {}; } - void AlphaTest(Id pointer) { - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - Id condition; + Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { using Compare = Maxwell::ComparisonOp; - switch (specialization.alpha_test_func) { + switch (compare_op) { case Compare::NeverOld: - condition = v_false; // Never true - break; + return v_false; // Never let the test pass case Compare::LessOld: - condition = OpFOrdLessThan(t_bool, alpha_reference, alpha_value); - break; + return OpFOrdLessThan(t_bool, operand_1, operand_2); case Compare::EqualOld: - condition = OpFOrdEqual(t_bool, alpha_reference, alpha_value); - break; + // Note: not accurate when tested against a unit test + // TODO: confirm if used by games + return OpFOrdEqual(t_bool, operand_1, operand_2); case Compare::LessEqualOld: - condition = OpFOrdLessThanEqual(t_bool, alpha_reference, alpha_value); - break; + return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); case Compare::GreaterOld: - // Note: requires "Equal" to properly work for ssbu. perhaps a precision issue - condition = OpFOrdGreaterThanEqual(t_bool, alpha_reference, alpha_value); - break; + return OpFOrdGreaterThan(t_bool, operand_1, operand_2); case Compare::NotEqualOld: // Note: not accurate when tested against a unit test // TODO: confirm if used by games - condition = OpFOrdNotEqual(t_bool, alpha_reference, alpha_value); - break; + return OpFOrdNotEqual(t_bool, operand_1, operand_2); case Compare::GreaterEqualOld: - condition = OpFOrdGreaterThanEqual(t_bool, alpha_reference, alpha_value); - break; - case Compare::AlwaysOld: - return; + return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); default: UNREACHABLE(); } - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); + } + + void AlphaTest(Id pointer) { + if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { + return; + } + + const Id true_label = OpLabel(); + const Id discard_label = OpLabel(); + const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); + const Id alpha_value = OpLoad(t_float, pointer); + + const Id condition = + MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); + + OpBranchConditional(condition, true_label, discard_label); + AddLabel(discard_label); OpKill(); - AddLabel(skip_label); + AddLabel(true_label); } void PreExit() { -- cgit v1.2.3 From d52ee6d0a7e5e588e57603d7a62604ba6f58db83 Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 25 Nov 2020 14:46:08 -0500 Subject: cleanup unneeded comments and newlines --- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d6685cd12..1c52f40bb 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2083,16 +2083,12 @@ private: case Compare::LessOld: return OpFOrdLessThan(t_bool, operand_1, operand_2); case Compare::EqualOld: - // Note: not accurate when tested against a unit test - // TODO: confirm if used by games return OpFOrdEqual(t_bool, operand_1, operand_2); case Compare::LessEqualOld: return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); case Compare::GreaterOld: return OpFOrdGreaterThan(t_bool, operand_1, operand_2); case Compare::NotEqualOld: - // Note: not accurate when tested against a unit test - // TODO: confirm if used by games return OpFOrdNotEqual(t_bool, operand_1, operand_2); case Compare::GreaterEqualOld: return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); @@ -2105,12 +2101,10 @@ private: if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { return; } - const Id true_label = OpLabel(); const Id discard_label = OpLabel(); const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); -- cgit v1.2.3 From eab041866b7c766aa38258aecef8a00c03612459 Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 25 Nov 2020 17:10:44 -0500 Subject: Queue decoded frames, cleanup decoders --- src/video_core/command_classes/codecs/codec.cpp | 30 +- src/video_core/command_classes/codecs/codec.h | 11 +- src/video_core/command_classes/codecs/h264.cpp | 2 +- src/video_core/command_classes/codecs/h264.h | 4 +- src/video_core/command_classes/codecs/vp9.cpp | 333 +++++++++------------- src/video_core/command_classes/codecs/vp9.h | 7 +- src/video_core/command_classes/codecs/vp9_types.h | 154 +++------- src/video_core/command_classes/nvdec.cpp | 6 +- src/video_core/command_classes/nvdec.h | 3 +- src/video_core/command_classes/vic.cpp | 15 +- 10 files changed, 227 insertions(+), 338 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 1adf3cd13..1a19341c8 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -18,6 +18,11 @@ extern "C" { namespace Tegra { +void av_frame_deleter(AVFrame* ptr) { + av_frame_unref(ptr); + av_free(ptr); +} + Codec::Codec(GPU& gpu_) : gpu(gpu_), h264_decoder(std::make_unique(gpu)), vp9_decoder(std::make_unique(gpu)) {} @@ -27,7 +32,9 @@ Codec::~Codec() { return; } // Free libav memory + AVFrame* av_frame{nullptr}; avcodec_send_packet(av_codec_ctx, nullptr); + av_frame = av_frame_alloc(); avcodec_receive_frame(av_codec_ctx, av_frame); avcodec_flush_buffers(av_codec_ctx); @@ -60,7 +67,7 @@ void Codec::Decode() { } av_codec_ctx = avcodec_alloc_context3(av_codec); - av_frame = av_frame_alloc(); + av_codec_ctx->refcounted_frames = 1; av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); // TODO(ameerj): libavcodec gpu hw acceleration @@ -68,8 +75,6 @@ void Codec::Decode() { const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); if (av_error < 0) { LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - av_frame_unref(av_frame); - av_free(av_frame); avcodec_close(av_codec_ctx); return; } @@ -96,16 +101,21 @@ void Codec::Decode() { if (!vp9_hidden_frame) { // Only receive/store visible frames - avcodec_receive_frame(av_codec_ctx, av_frame); + AVFramePtr frame = AVFramePtr{av_frame_alloc(), av_frame_deleter}; + avcodec_receive_frame(av_codec_ctx, frame.get()); + av_frames.push(std::move(frame)); } } -AVFrame* Codec::GetCurrentFrame() { - return av_frame; -} - -const AVFrame* Codec::GetCurrentFrame() const { - return av_frame; +AVFramePtr Codec::GetCurrentFrame() { + // Sometimes VIC will request more frames than have been decoded. + // in this case, return a nullptr and don't overwrite previous frame data + if (av_frames.size() > 0) { + AVFramePtr frame = std::move(av_frames.front()); + av_frames.pop(); + return frame; + } + return AVFramePtr{nullptr, av_frame_deleter}; } NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 5bbe6a332..c26b59fde 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -22,6 +23,9 @@ namespace Tegra { class GPU; struct VicRegisters; +void av_frame_deleter(AVFrame* ptr); +using AVFramePtr = std::unique_ptr; + namespace Decoder { class H264; class VP9; @@ -41,9 +45,8 @@ public: /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); - /// Returns most recently decoded frame - [[nodiscard]] AVFrame* GetCurrentFrame(); - [[nodiscard]] const AVFrame* GetCurrentFrame() const; + /// Returns next decoded frame + [[nodiscard]] AVFramePtr GetCurrentFrame(); /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; @@ -54,13 +57,13 @@ private: AVCodec* av_codec{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; - AVFrame* av_frame{nullptr}; GPU& gpu; std::unique_ptr h264_decoder; std::unique_ptr vp9_decoder; NvdecCommon::NvdecRegisters state{}; + std::queue av_frames{}; }; } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 33e063e20..65bbeac78 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -43,7 +43,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; -const std::vector& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, +const std::vector& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { H264DecoderContext context{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 273449495..0f3a1d9f3 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -74,8 +74,8 @@ public: ~H264(); /// Compose the H264 header of the frame for FFmpeg decoding - [[nodiscard]] const std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, - bool is_first_frame = false); + [[nodiscard]] const std::vector& ComposeFrameHeader( + const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: struct H264ParameterSet { diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index ab44fdc9e..31e00c27d 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -23,122 +23,102 @@ constexpr Vp9EntropyProbs default_probs{ 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0, }, .coef_probs{ - 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0, - 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, - 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, - 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0, - 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0, - 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, - 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0, - 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0, - 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, - 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, - 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0, - 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0, - 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, - 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0, - 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0, - 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, - 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, - 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0, - 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0, - 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, - 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, - 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0, - 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0, - 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, - 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, - 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0, - 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, - 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, - 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0, - 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0, - 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, - 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, - 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0, - 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, - 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, - 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0, - 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0, - 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, - 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0, - 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0, - 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, - 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, - 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0, - 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0, - 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, - 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0, - 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0, - 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, - 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, - 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0, - 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0, - 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, - 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, - 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0, - 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0, - 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, - 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, - 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0, - 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, - 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, - 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0, - 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0, - 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, - 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, - 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0, - 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, - 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, - 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0, - 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0, - 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, - 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0, - 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0, - 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, - 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, - 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0, - 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0, - 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, - 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0, - 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0, - 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, - 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, - 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0, - 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0, - 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, - 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, - 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0, - 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0, - 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, - 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, - 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0, - 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, - 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, - 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0, - 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0, - 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, - 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, - 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0, - 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, - 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, - 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0, - 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0, - 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, - 1, 16, 6, 0, + 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32, + 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47, + 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31, + 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29, + 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31, + 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33, + 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26, + 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34, + 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34, + 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42, + 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28, + 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33, + 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38, + 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37, + 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47, + 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57, + 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39, + 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47, + 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52, + 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61, + 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29, + 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45, + 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24, + 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23, + 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29, + 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57, + 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21, + 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20, + 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22, + 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32, + 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25, + 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42, + 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30, + 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30, + 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34, + 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84, + 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28, + 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29, + 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32, + 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41, + 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18, + 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27, + 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23, + 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23, + 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20, + 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52, + 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28, + 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20, + 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20, + 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24, + 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28, + 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29, + 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27, + 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26, + 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28, + 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97, + 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30, + 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29, + 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26, + 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31, + 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16, + 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20, + 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17, + 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18, + 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17, + 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61, + 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30, + 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27, + 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22, + 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23, + 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23, + 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19, + 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16, + 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17, + 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23, + 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137, + 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91, + 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102, + 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79, + 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6, }, .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144}, .inter_mode_prob{ @@ -322,39 +302,23 @@ bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) { } void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, - const std::array& new_prob, - const std::array& old_prob) { - // Note: There's 1 byte added on each packet for alignment, - // this byte is ignored when doing updates. - constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4; - - const auto needs_update = [&](s32 base_index) -> bool { - s32 index = base_index; - for (s32 i = 0; i < 2; i++) { - for (s32 j = 0; j < 2; j++) { - for (s32 k = 0; k < 6; k++) { - for (s32 l = 0; l < 6; l++) { - if (new_prob[index + 0] != old_prob[index + 0] || - new_prob[index + 1] != old_prob[index + 1] || - new_prob[index + 2] != old_prob[index + 2]) { - return true; - } - - index += 4; - } - } - } - } - return false; + const std::array& new_prob, + const std::array& old_prob) { + constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3; + + const auto needs_update = [&](u32 base_index) -> bool { + return !std::equal(new_prob.begin() + base_index, + new_prob.begin() + base_index + block_bytes, + old_prob.begin() + base_index); }; - for (s32 block_index = 0; block_index < 4; block_index++) { - const s32 base_index = block_index * block_bytes; + for (u32 block_index = 0; block_index < 4; block_index++) { + const u32 base_index = block_index * block_bytes; const bool update = needs_update(base_index); writer.Write(update); if (update) { - s32 index = base_index; + u32 index = base_index; for (s32 i = 0; i < 2; i++) { for (s32 j = 0; j < 2; j++) { for (s32 k = 0; k < 6; k++) { @@ -367,14 +331,13 @@ void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, WriteProbabilityUpdate(writer, new_prob[index + 2], old_prob[index + 2]); } - index += 4; + index += 3; } } } } } - - if (block_index == tx_mode) { + if (block_index == static_cast(tx_mode)) { break; } } @@ -392,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); - Vp9PictureInfo vp9_info = picture_info.Convert(); + Vp9PictureInfo vp9_info = std::move(picture_info.Convert()); InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); @@ -414,8 +377,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) Vp9FrameContainer frame{}; { gpu.SyncGuestHost(); - frame.info = GetVp9PictureInfo(state); - + frame.info = std::move(GetVp9PictureInfo(state)); frame.bit_stream.resize(frame.info.bitstream_size); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), frame.info.bitstream_size); @@ -423,37 +385,37 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) // Buffer two frames, saving the last show frame info if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ - .info = frame.info, - .bit_stream = frame.bit_stream, + .info = std::move(frame.info), + .bit_stream = std::move(frame.bit_stream), }; next_next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_next_frame.info; - frame.bit_stream = next_next_frame.bit_stream; + frame.info = std::move(next_next_frame.info); + frame.bit_stream = std::move(next_next_frame.bit_stream); next_next_frame = std::move(temp); if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp2{ - .info = frame.info, - .bit_stream = frame.bit_stream, + .info = std::move(frame.info), + .bit_stream = std::move(frame.bit_stream), }; next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_frame.info; - frame.bit_stream = next_frame.bit_stream; + frame.info = std::move(next_frame.info); + frame.bit_stream = std::move(next_frame.bit_stream); next_frame = std::move(temp2); } else { - next_frame.info = frame.info; - next_frame.bit_stream = frame.bit_stream; + next_frame.info = std::move(frame.info); + next_frame.bit_stream = std::move(frame.bit_stream); } } else { - next_next_frame.info = frame.info; - next_next_frame.bit_stream = frame.bit_stream; + next_next_frame.info = std::move(frame.info); + next_next_frame.bit_stream = std::move(frame.bit_stream); } return frame; } std::vector VP9::ComposeCompressedHeader() { VpxRangeEncoder writer{}; - + const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame; if (!current_frame_info.lossless) { if (static_cast(current_frame_info.transform_mode) >= 3) { writer.Write(3, 2); @@ -471,7 +433,7 @@ std::vector VP9::ComposeCompressedHeader() { prev_frame_probs.tx_16x16_prob); WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob, prev_frame_probs.tx_32x32_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob; prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob; prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob; @@ -484,7 +446,7 @@ std::vector VP9::ComposeCompressedHeader() { WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs, prev_frame_probs.skip_probs); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs; prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs; } @@ -493,15 +455,12 @@ std::vector VP9::ComposeCompressedHeader() { // read_inter_probs() in the spec WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob, prev_frame_probs.inter_mode_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { - prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; - } if (current_frame_info.interp_filter == 4) { // read_interp_filter_probs() in the spec WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob, prev_frame_probs.switchable_interp_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.switchable_interp_prob = current_frame_info.entropy.switchable_interp_prob; } @@ -510,9 +469,7 @@ std::vector VP9::ComposeCompressedHeader() { // read_is_inter_probs() in the spec WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob, prev_frame_probs.intra_inter_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { - prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; - } + // frame_reference_mode() in the spec if ((current_frame_info.ref_frame_sign_bias[1] & 1) != (current_frame_info.ref_frame_sign_bias[2] & 1) || @@ -530,7 +487,7 @@ std::vector VP9::ComposeCompressedHeader() { if (current_frame_info.reference_mode == 2) { WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob, prev_frame_probs.comp_inter_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob; } } @@ -538,7 +495,7 @@ std::vector VP9::ComposeCompressedHeader() { if (current_frame_info.reference_mode != 1) { WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob, prev_frame_probs.single_ref_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob; } } @@ -546,7 +503,7 @@ std::vector VP9::ComposeCompressedHeader() { if (current_frame_info.reference_mode != 0) { WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob, prev_frame_probs.comp_ref_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob; } } @@ -557,42 +514,37 @@ std::vector VP9::ComposeCompressedHeader() { WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index], prev_frame_probs.y_mode_prob[index]); } - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { - prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; - } + // read_partition_probs WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob, prev_frame_probs.partition_prob); - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { - prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; - } // mv_probs for (s32 i = 0; i < 3; i++) { WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i], prev_frame_probs.joints[i]); } - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { + prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; + prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; + prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; + prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; prev_frame_probs.joints = current_frame_info.entropy.joints; } for (s32 i = 0; i < 2; i++) { WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i], prev_frame_probs.sign[i]); - for (s32 j = 0; j < 10; j++) { const int index = i * 10 + j; - WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index], prev_frame_probs.classes[index]); } - WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i], prev_frame_probs.class_0[i]); for (s32 j = 0; j < 10; j++) { const int index = i * 10 + j; - WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index], prev_frame_probs.prob_bits[index]); } @@ -602,7 +554,6 @@ std::vector VP9::ComposeCompressedHeader() { for (s32 j = 0; j < 2; j++) { for (s32 k = 0; k < 3; k++) { const int index = i * 2 * 3 + j * 3 + k; - WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index], prev_frame_probs.class_0_fr[index]); } @@ -610,7 +561,6 @@ std::vector VP9::ComposeCompressedHeader() { for (s32 j = 0; j < 3; j++) { const int index = i * 3 + j; - WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index], prev_frame_probs.fr[index]); } @@ -626,7 +576,7 @@ std::vector VP9::ComposeCompressedHeader() { } // save previous probs - if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + if (update_probs) { prev_frame_probs.sign = current_frame_info.entropy.sign; prev_frame_probs.classes = current_frame_info.entropy.classes; prev_frame_probs.class_0 = current_frame_info.entropy.class_0; @@ -637,7 +587,6 @@ std::vector VP9::ComposeCompressedHeader() { prev_frame_probs.high_precision = current_frame_info.entropy.high_precision; } } - writer.End(); return writer.GetBuffer(); } @@ -854,11 +803,11 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { return uncomp_writer; } -const std::vector& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { +const std::vector& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) { std::vector bitstream; { - Vp9FrameContainer curr_frame = GetCurrentFrame(state); - current_frame_info = curr_frame.info; + Vp9FrameContainer curr_frame = std::move(GetCurrentFrame(state)); + current_frame_info = std::move(curr_frame.info); bitstream = std::move(curr_frame.bit_stream); } diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index e2504512c..9ebbbf59e 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -119,7 +119,8 @@ public: /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec /// documentation - [[nodiscard]] const std::vector& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); + [[nodiscard]] const std::vector& ComposeFrameHeader( + const NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. [[nodiscard]] bool WasFrameHidden() const { @@ -147,8 +148,8 @@ private: /// Writes probability updates for the Coef probabilities void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, - const std::array& new_prob, - const std::array& old_prob); + const std::array& new_prob, + const std::array& old_prob); /// Write probabilities for 4-byte aligned structures template diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 4f0b05d22..5ca944f2a 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -31,62 +31,6 @@ enum FrameFlags : u32 { IntraOnly = 1 << 5, }; -enum class MvJointType { - MvJointZero = 0, /* Zero vector */ - MvJointHnzvz = 1, /* Vert zero, hor nonzero */ - MvJointHzvnz = 2, /* Hor zero, vert nonzero */ - MvJointHnzvnz = 3, /* Both components nonzero */ -}; -enum class MvClassType { - MvClass0 = 0, /* (0, 2] integer pel */ - MvClass1 = 1, /* (2, 4] integer pel */ - MvClass2 = 2, /* (4, 8] integer pel */ - MvClass3 = 3, /* (8, 16] integer pel */ - MvClass4 = 4, /* (16, 32] integer pel */ - MvClass5 = 5, /* (32, 64] integer pel */ - MvClass6 = 6, /* (64, 128] integer pel */ - MvClass7 = 7, /* (128, 256] integer pel */ - MvClass8 = 8, /* (256, 512] integer pel */ - MvClass9 = 9, /* (512, 1024] integer pel */ - MvClass10 = 10, /* (1024,2048] integer pel */ -}; - -enum class BlockSize { - Block4x4 = 0, - Block4x8 = 1, - Block8x4 = 2, - Block8x8 = 3, - Block8x16 = 4, - Block16x8 = 5, - Block16x16 = 6, - Block16x32 = 7, - Block32x16 = 8, - Block32x32 = 9, - Block32x64 = 10, - Block64x32 = 11, - Block64x64 = 12, - BlockSizes = 13, - BlockInvalid = BlockSizes -}; - -enum class PredictionMode { - DcPred = 0, // Average of above and left pixels - VPred = 1, // Vertical - HPred = 2, // Horizontal - D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) - D135Pred = 4, // Directional 135 deg = 180 - 45 - D117Pred = 5, // Directional 117 deg = 180 - 63 - D153Pred = 6, // Directional 153 deg = 180 - 27 - D207Pred = 7, // Directional 207 deg = 180 + 27 - D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) - TmPred = 9, // True-motion - NearestMv = 10, - NearMv = 11, - ZeroMv = 12, - NewMv = 13, - MbModeCount = 14 -}; - enum class TxSize { Tx4x4 = 0, // 4x4 transform Tx8x8 = 1, // 8x8 transform @@ -104,13 +48,6 @@ enum class TxMode { TxModes = 5 }; -enum class reference_mode { - SingleReference = 0, - CompoundReference = 1, - ReferenceModeSelect = 2, - ReferenceModes = 3 -}; - struct Segmentation { u8 enabled{}; u8 update_map{}; @@ -131,7 +68,7 @@ static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { std::array y_mode_prob{}; std::array partition_prob{}; - std::array coef_probs{}; + std::array coef_probs{}; std::array switchable_interp_prob{}; std::array inter_mode_prob{}; std::array intra_inter_prob{}; @@ -152,7 +89,7 @@ struct Vp9EntropyProbs { std::array class_0_hp{}; std::array high_precision{}; }; -static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size"); +static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { bool is_key_frame{}; @@ -278,44 +215,41 @@ static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { INSERT_PADDING_BYTES(1024); - std::array, 7> inter_mode_prob{}; + std::array inter_mode_prob{}; std::array intra_inter_prob{}; INSERT_PADDING_BYTES(80); - std::array, 2> tx_8x8_prob{}; - std::array, 2> tx_16x16_prob{}; - std::array, 2> tx_32x32_prob{}; + std::array tx_8x8_prob{}; + std::array tx_16x16_prob{}; + std::array tx_32x32_prob{}; std::array y_mode_prob_e8{}; std::array, 4> y_mode_prob_e0e7{}; INSERT_PADDING_BYTES(64); - std::array, 16> partition_prob{}; + std::array partition_prob{}; INSERT_PADDING_BYTES(10); - std::array, 4> switchable_interp_prob{}; + std::array switchable_interp_prob{}; std::array comp_inter_prob{}; - std::array skip_probs{}; + std::array skip_probs{}; + INSERT_PADDING_BYTES(1); std::array joints{}; std::array sign{}; - std::array, 2> class_0{}; - std::array, 2> fr{}; + std::array class_0{}; + std::array fr{}; std::array class_0_hp{}; std::array high_precision{}; - std::array, 2> classes{}; - std::array, 2>, 2> class_0_fr{}; - std::array, 2> pred_bits{}; - std::array, 5> single_ref_prob{}; + std::array classes{}; + std::array class_0_fr{}; + std::array pred_bits{}; + std::array single_ref_prob{}; std::array comp_ref_prob{}; INSERT_PADDING_BYTES(17); - std::array, 6>, 6>, 2>, 2>, 4> - coef_probs{}; + std::array coef_probs{}; void Convert(Vp9EntropyProbs& fc) { - std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size()); - - std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(), - fc.intra_inter_prob.size()); - - std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size()); - std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size()); - std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size()); + fc.inter_mode_prob = std::move(inter_mode_prob); + fc.intra_inter_prob = std::move(intra_inter_prob); + fc.tx_8x8_prob = std::move(tx_8x8_prob); + fc.tx_16x16_prob = std::move(tx_16x16_prob); + fc.tx_32x32_prob = std::move(tx_32x32_prob); for (s32 i = 0; i < 4; i++) { for (s32 j = 0; j < 9; j++) { @@ -323,27 +257,29 @@ struct EntropyProbs { } } - std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size()); - - std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(), - fc.switchable_interp_prob.size()); - std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size()); - std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size()); - - std::memcpy(fc.joints.data(), joints.data(), fc.joints.size()); - - std::memcpy(fc.sign.data(), sign.data(), fc.sign.size()); - std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size()); - std::memcpy(fc.fr.data(), fr.data(), fc.fr.size()); - std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size()); - std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size()); - std::memcpy(fc.classes.data(), classes.data(), fc.classes.size()); - std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size()); - std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size()); - std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size()); - std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size()); - - std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size()); + fc.partition_prob = std::move(partition_prob); + fc.switchable_interp_prob = std::move(switchable_interp_prob); + fc.comp_inter_prob = std::move(comp_inter_prob); + fc.skip_probs = std::move(skip_probs); + fc.joints = std::move(joints); + fc.sign = std::move(sign); + fc.class_0 = std::move(class_0); + fc.fr = std::move(fr); + fc.class_0_hp = std::move(class_0_hp); + fc.high_precision = std::move(high_precision); + fc.classes = std::move(classes); + fc.class_0_fr = std::move(class_0_fr); + fc.prob_bits = std::move(pred_bits); + fc.single_ref_prob = std::move(single_ref_prob); + fc.comp_ref_prob = std::move(comp_ref_prob); + + // Skip the 4th element as it goes unused + for (std::size_t i = 0; i < coef_probs.size(); i += 4) { + const std::size_t j = i - i / 4; + fc.coef_probs[j] = coef_probs[i]; + fc.coef_probs[j + 1] = coef_probs[i + 1]; + fc.coef_probs[j + 2] = coef_probs[i + 2]; + } } }; static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size"); diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index 8ca7a7b06..79e1f4e13 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -29,11 +29,7 @@ void Nvdec::ProcessMethod(Method method, const std::vector& arguments) { } } -AVFrame* Nvdec::GetFrame() { - return codec->GetCurrentFrame(); -} - -const AVFrame* Nvdec::GetFrame() const { +AVFramePtr Nvdec::GetFrame() { return codec->GetCurrentFrame(); } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index eec4443f9..e4877c533 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -26,8 +26,7 @@ public: void ProcessMethod(Method method, const std::vector& arguments); /// Return most recently decoded frame - [[nodiscard]] AVFrame* GetFrame(); - [[nodiscard]] const AVFrame* GetFrame() const; + [[nodiscard]] AVFramePtr GetFrame(); private: /// Invoke codec to decode a frame diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 5b52da277..248443027 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -58,17 +58,18 @@ void Vic::Execute() { return; } const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; + const AVFramePtr frame_ptr = std::move(nvdec_processor->GetFrame()); + const auto* frame = frame_ptr.get(); + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } const VideoPixelFormat pixel_format = static_cast(config.pixel_format.Value()); switch (pixel_format) { case VideoPixelFormat::BGRA8: case VideoPixelFormat::RGBA8: { LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); - const auto* frame = nvdec_processor->GetFrame(); - if (!frame || frame->width == 0 || frame->height == 0) { - return; - } if (scaler_ctx == nullptr || frame->width != scaler_width || frame->height != scaler_height) { const AVPixelFormat target_format = @@ -121,12 +122,6 @@ void Vic::Execute() { case VideoPixelFormat::Yuv420: { LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); - const auto* frame = nvdec_processor->GetFrame(); - - if (!frame || frame->width == 0 || frame->height == 0) { - return; - } - const std::size_t surface_width = config.surface_width_minus1 + 1; const std::size_t surface_height = config.surface_height_minus1 + 1; const std::size_t half_width = surface_width / 2; -- cgit v1.2.3 From c9e3abe2060760d71c83a1574559b6e479e637d2 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 26 Nov 2020 00:18:26 -0500 Subject: Address PR feedback remove some redundant moves, make deleter match naming guidelines. Co-Authored-By: LC <712067+lioncash@users.noreply.github.com> --- src/video_core/command_classes/codecs/codec.cpp | 15 ++++---- src/video_core/command_classes/codecs/codec.h | 4 +-- src/video_core/command_classes/codecs/vp9.cpp | 2 +- src/video_core/command_classes/codecs/vp9_types.h | 44 +++++++++++------------ 4 files changed, 33 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 1a19341c8..412e1e41c 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -18,7 +18,7 @@ extern "C" { namespace Tegra { -void av_frame_deleter(AVFrame* ptr) { +void AVFrameDeleter(AVFrame* ptr) { av_frame_unref(ptr); av_free(ptr); } @@ -101,7 +101,7 @@ void Codec::Decode() { if (!vp9_hidden_frame) { // Only receive/store visible frames - AVFramePtr frame = AVFramePtr{av_frame_alloc(), av_frame_deleter}; + AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; avcodec_receive_frame(av_codec_ctx, frame.get()); av_frames.push(std::move(frame)); } @@ -110,12 +110,13 @@ void Codec::Decode() { AVFramePtr Codec::GetCurrentFrame() { // Sometimes VIC will request more frames than have been decoded. // in this case, return a nullptr and don't overwrite previous frame data - if (av_frames.size() > 0) { - AVFramePtr frame = std::move(av_frames.front()); - av_frames.pop(); - return frame; + if (av_frames.empty()) { + return AVFramePtr{nullptr, AVFrameDeleter}; } - return AVFramePtr{nullptr, av_frame_deleter}; + + AVFramePtr frame = std::move(av_frames.front()); + av_frames.pop(); + return frame; } NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index c26b59fde..0c6dde405 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -23,8 +23,8 @@ namespace Tegra { class GPU; struct VicRegisters; -void av_frame_deleter(AVFrame* ptr); -using AVFramePtr = std::unique_ptr; +void AVFrameDeleter(AVFrame* ptr); +using AVFramePtr = std::unique_ptr; namespace Decoder { class H264; diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 31e00c27d..b1d675cdb 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -306,7 +306,7 @@ void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, const std::array& old_prob) { constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3; - const auto needs_update = [&](u32 base_index) -> bool { + const auto needs_update = [&](u32 base_index) { return !std::equal(new_prob.begin() + base_index, new_prob.begin() + base_index + block_bytes, old_prob.begin() + base_index); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 5ca944f2a..139501a1c 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -245,33 +245,33 @@ struct EntropyProbs { std::array coef_probs{}; void Convert(Vp9EntropyProbs& fc) { - fc.inter_mode_prob = std::move(inter_mode_prob); - fc.intra_inter_prob = std::move(intra_inter_prob); - fc.tx_8x8_prob = std::move(tx_8x8_prob); - fc.tx_16x16_prob = std::move(tx_16x16_prob); - fc.tx_32x32_prob = std::move(tx_32x32_prob); + fc.inter_mode_prob = inter_mode_prob; + fc.intra_inter_prob = intra_inter_prob; + fc.tx_8x8_prob = tx_8x8_prob; + fc.tx_16x16_prob = tx_16x16_prob; + fc.tx_32x32_prob = tx_32x32_prob; - for (s32 i = 0; i < 4; i++) { - for (s32 j = 0; j < 9; j++) { + for (std::size_t i = 0; i < 4; i++) { + for (std::size_t j = 0; j < 9; j++) { fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i]; } } - fc.partition_prob = std::move(partition_prob); - fc.switchable_interp_prob = std::move(switchable_interp_prob); - fc.comp_inter_prob = std::move(comp_inter_prob); - fc.skip_probs = std::move(skip_probs); - fc.joints = std::move(joints); - fc.sign = std::move(sign); - fc.class_0 = std::move(class_0); - fc.fr = std::move(fr); - fc.class_0_hp = std::move(class_0_hp); - fc.high_precision = std::move(high_precision); - fc.classes = std::move(classes); - fc.class_0_fr = std::move(class_0_fr); - fc.prob_bits = std::move(pred_bits); - fc.single_ref_prob = std::move(single_ref_prob); - fc.comp_ref_prob = std::move(comp_ref_prob); + fc.partition_prob = partition_prob; + fc.switchable_interp_prob = switchable_interp_prob; + fc.comp_inter_prob = comp_inter_prob; + fc.skip_probs = skip_probs; + fc.joints = joints; + fc.sign = sign; + fc.class_0 = class_0; + fc.fr = fr; + fc.class_0_hp = class_0_hp; + fc.high_precision = high_precision; + fc.classes = classes; + fc.class_0_fr = class_0_fr; + fc.prob_bits = pred_bits; + fc.single_ref_prob = single_ref_prob; + fc.comp_ref_prob = comp_ref_prob; // Skip the 4th element as it goes unused for (std::size_t i = 0; i < coef_probs.size(); i += 4) { -- cgit v1.2.3 From 979b60273889f070737d1fe3037991245180ca67 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 26 Nov 2020 14:04:06 -0500 Subject: Limit queue size to 10 frames Workaround for ZLA, which seems to decode and queue twice as many frames as it displays. --- src/video_core/command_classes/codecs/codec.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 412e1e41c..9a88f64e4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -104,6 +104,10 @@ void Codec::Decode() { AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; avcodec_receive_frame(av_codec_ctx, frame.get()); av_frames.push(std::move(frame)); + // Limit queue to 10 frames. Workaround for ZLA decode and queue spam + if (av_frames.size() > 10) { + av_frames.pop(); + } } } -- cgit v1.2.3 From 2ccf85a9103afbb4dc227e481bb0e3a7360e833b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 26 Nov 2020 16:49:20 -0300 Subject: vk_shader_decompiler: Implement force early fragment tests Force early fragment tests when the 3D method is enabled. The established pipeline cache takes care of recompiling if needed. This is implemented only on Vulkan to avoid invalidating the shader cache on OpenGL. --- src/video_core/engines/maxwell_3d.h | 7 ++++++- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 7 ++++--- src/video_core/renderer_vulkan/fixed_pipeline_state.h | 8 ++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 6 +++--- src/video_core/renderer_vulkan/vk_shader_decompiler.h | 1 + 6 files changed, 19 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1cbe8fe67..b0d9559d0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -755,7 +755,11 @@ public: u32 data_upload; - INSERT_UNION_PADDING_WORDS(0x44); + INSERT_UNION_PADDING_WORDS(0x16); + + u32 force_early_fragment_tests; + + INSERT_UNION_PADDING_WORDS(0x2D); struct { union { @@ -1572,6 +1576,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49); ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec_upload, 0x6C); ASSERT_REG_POSITION(data_upload, 0x6D); +ASSERT_REG_POSITION(force_early_fragment_tests, 0x84); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tess_mode, 0xC8); ASSERT_REG_POSITION(tess_level_outer, 0xC9); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index fffae528e..5ec43db11 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -46,7 +46,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta regs.polygon_offset_fill_enable}; const u32 topology_index = static_cast(regs.draw.topology.Value()); - raw = 0; + raw1 = 0; primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -61,12 +61,13 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); - alpha_raw = 0; + raw2 = 0; const auto test_func = regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); - alpha_test_ref = Common::BitCast(regs.alpha_test_ref); + early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 42480e8d0..c26b77790 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -171,7 +171,7 @@ struct FixedPipelineState { }; union { - u32 raw; + u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; @@ -187,13 +187,13 @@ struct FixedPipelineState { BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; - - u32 alpha_test_ref; ///< Alpha test reference value union { - u32 alpha_raw; + u32 raw2; BitField<0, 3, u32> alpha_test_func; + BitField<3, 1, u32> early_z; }; + u32 alpha_test_ref; u32 point_size; std::array binding_divisors; std::array attributes; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f9efe526d..df7e8c864 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -344,6 +344,7 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { specialization.attribute_types[i] = attribute.Type(); } specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; + specialization.early_fragment_tests = fixed_state.early_z; // Alpha test specialization.alpha_test_func = diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 1c52f40bb..fed9ebecd 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -315,7 +315,6 @@ public: "supported on this device"); } } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { if (ir.UsesViewportIndex()) { AddCapability(spv::Capability::MultiViewport); @@ -325,11 +324,9 @@ public: AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (device.IsFormatlessImageLoadSupported()) { AddCapability(spv::Capability::StorageImageReadWithoutFormat); } - if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } @@ -377,6 +374,9 @@ public: if (header.ps.omap.depth) { AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } + if (specialization.early_fragment_tests) { + AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } break; case ShaderType::Compute: const auto workgroup_size = specialization.workgroup_size; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index cd3d0a415..110848922 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -95,6 +95,7 @@ struct Specialization final { std::bitset enabled_attributes; std::array attribute_types{}; bool ndc_minus_one_to_one{}; + bool early_fragment_tests{}; float alpha_test_ref{}; Maxwell::ComparisonOp alpha_test_func{}; }; -- cgit v1.2.3 From 4681e1ea9ea749646488d05d45327f57c4b321f2 Mon Sep 17 00:00:00 2001 From: comex Date: Sat, 14 Nov 2020 18:35:34 -0500 Subject: codec: Fix `pragma GCC diagnostic pop` missing corresponding push --- src/video_core/command_classes/codecs/codec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 5bbe6a332..ee5d62540 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -10,6 +10,7 @@ extern "C" { #if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #endif #include -- cgit v1.2.3 From cf9767c608dfb49b77708966d8d07354930d150c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 3 Dec 2020 12:33:05 -0500 Subject: vp9/vic: Resolve pessimizing moves Removes the usage of moves that don't result in behavior different from a copy, or otherwise would prevent copy elision from occurring. --- src/video_core/command_classes/codecs/vp9.cpp | 20 ++++++++++---------- src/video_core/command_classes/vic.cpp | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index b1d675cdb..7d8d6ee3c 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); - Vp9PictureInfo vp9_info = std::move(picture_info.Convert()); + Vp9PictureInfo vp9_info = picture_info.Convert(); InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); @@ -377,7 +377,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) Vp9FrameContainer frame{}; { gpu.SyncGuestHost(); - frame.info = std::move(GetVp9PictureInfo(state)); + frame.info = GetVp9PictureInfo(state); frame.bit_stream.resize(frame.info.bitstream_size); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), frame.info.bitstream_size); @@ -385,29 +385,29 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) // Buffer two frames, saving the last show frame info if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ - .info = std::move(frame.info), + .info = frame.info, .bit_stream = std::move(frame.bit_stream), }; next_next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = std::move(next_next_frame.info); + frame.info = next_next_frame.info; frame.bit_stream = std::move(next_next_frame.bit_stream); next_next_frame = std::move(temp); if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp2{ - .info = std::move(frame.info), + .info = frame.info, .bit_stream = std::move(frame.bit_stream), }; next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = std::move(next_frame.info); + frame.info = next_frame.info; frame.bit_stream = std::move(next_frame.bit_stream); next_frame = std::move(temp2); } else { - next_frame.info = std::move(frame.info); + next_frame.info = frame.info; next_frame.bit_stream = std::move(frame.bit_stream); } } else { - next_next_frame.info = std::move(frame.info); + next_next_frame.info = frame.info; next_next_frame.bit_stream = std::move(frame.bit_stream); } return frame; @@ -806,8 +806,8 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { const std::vector& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) { std::vector bitstream; { - Vp9FrameContainer curr_frame = std::move(GetCurrentFrame(state)); - current_frame_info = std::move(curr_frame.info); + Vp9FrameContainer curr_frame = GetCurrentFrame(state); + current_frame_info = curr_frame.info; bitstream = std::move(curr_frame.bit_stream); } diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 248443027..6cfc193fa 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -58,7 +58,7 @@ void Vic::Execute() { return; } const VicConfig config{gpu.MemoryManager().Read(config_struct_address + 0x20)}; - const AVFramePtr frame_ptr = std::move(nvdec_processor->GetFrame()); + const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); const auto* frame = frame_ptr.get(); if (!frame || frame->width == 0 || frame->height == 0) { return; -- cgit v1.2.3 From 7cf34c3637b85feeba2cf1ac67fc3baf9803804b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 3 Dec 2020 15:59:38 -0500 Subject: node: Eliminate variable shadowing --- src/video_core/shader/node.h | 96 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 47 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8f230d57a..8b081030f 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -284,24 +284,26 @@ using TrackSampler = std::shared_ptr; struct Sampler { /// Bound samplers constructor - constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) - : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer}, is_indexed{is_indexed} {} + constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, + bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) + : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, + is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor - constexpr explicit Sampler(u32 index, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow, - bool is_buffer) - : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} + constexpr explicit Sampler(u32 index_, std::pair offsets, std::pair buffers, + Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, + bool is_buffer_) + : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array_}, + is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor - constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) - : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} + constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, + Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, + bool is_buffer_, bool is_indexed_) + : index{index_}, offset{offset_}, buffer{buffer_}, type{type}, is_array{is_array_}, + is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { + } u32 index = 0; ///< Emulated index given for the this sampler. u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. @@ -341,12 +343,12 @@ struct BindlessSamplerNode { struct Image { public: /// Bound images constructor - constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type) - : index{index}, offset{offset}, type{type} {} + constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) + : index{index_}, offset{offset_}, type{type_} {} /// Bindless samplers constructor - constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type) - : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {} + constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) + : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} void MarkWrite() { is_written = true; @@ -437,20 +439,20 @@ private: /// Holds any kind of operation that can be done in the IR class OperationNode final : public AmendNode { public: - explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} + explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, std::move(meta), std::vector{}) {} + explicit OperationNode(OperationCode code_, Meta meta_) + : OperationNode(code_, std::move(meta_), std::vector{}) {} - explicit OperationNode(OperationCode code, std::vector operands) - : OperationNode(code, Meta{}, std::move(operands)) {} + explicit OperationNode(OperationCode code_, std::vector operands_) + : OperationNode(code_, Meta{}, std::move(operands_)) {} - explicit OperationNode(OperationCode code, Meta meta, std::vector operands) - : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {} + explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) + : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} template - explicit OperationNode(OperationCode code, Meta meta, Args&&... operands) - : code{code}, meta{std::move(meta)}, operands{operands...} {} + explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) + : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} OperationCode GetCode() const { return code; @@ -477,8 +479,8 @@ private: /// Encloses inside any kind of node that returns a boolean conditionally-executed code class ConditionalNode final : public AmendNode { public: - explicit ConditionalNode(Node condition, std::vector&& code) - : condition{std::move(condition)}, code{std::move(code)} {} + explicit ConditionalNode(Node condition_, std::vector&& code_) + : condition{std::move(condition_)}, code{std::move(code_)} {} const Node& GetCondition() const { return condition; @@ -496,7 +498,7 @@ private: /// A general purpose register class GprNode final { public: - explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} + explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} u32 GetIndex() const { return static_cast(index); @@ -509,7 +511,7 @@ private: /// A custom variable class CustomVarNode final { public: - explicit constexpr CustomVarNode(u32 index) : index{index} {} + explicit constexpr CustomVarNode(u32 index_) : index{index_} {} constexpr u32 GetIndex() const { return index; @@ -522,7 +524,7 @@ private: /// A 32-bits value that represents an immediate value class ImmediateNode final { public: - explicit constexpr ImmediateNode(u32 value) : value{value} {} + explicit constexpr ImmediateNode(u32 value_) : value{value_} {} u32 GetValue() const { return value; @@ -535,7 +537,7 @@ private: /// One of Maxwell's internal flags class InternalFlagNode final { public: - explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} + explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} InternalFlag GetFlag() const { return flag; @@ -548,8 +550,8 @@ private: /// A predicate register, it can be negated without additional nodes class PredicateNode final { public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) - : index{index}, negated{negated} {} + explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) + : index{index_}, negated{negated_} {} Tegra::Shader::Pred GetIndex() const { return index; @@ -568,12 +570,12 @@ private: class AbufNode final { public: // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) - : buffer{std::move(buffer)}, index{index}, element{element} {} + explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) + : buffer{std::move(buffer_)}, index{index_}, element{element_} {} // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address, Node buffer = {}) - : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {} + explicit AbufNode(Node physical_address_, Node buffer_ = {}) + : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} Tegra::Shader::Attribute::Index GetIndex() const { return index; @@ -605,7 +607,7 @@ private: /// Patch memory (used to communicate tessellation stages). class PatchNode final { public: - explicit PatchNode(u32 offset) : offset{offset} {} + explicit PatchNode(u32 offset_) : offset{offset_} {} u32 GetOffset() const { return offset; @@ -618,7 +620,7 @@ private: /// Constant buffer node, usually mapped to uniform buffers in GLSL class CbufNode final { public: - explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {} + explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} u32 GetIndex() const { return index; @@ -636,7 +638,7 @@ private: /// Local memory node class LmemNode final { public: - explicit LmemNode(Node address) : address{std::move(address)} {} + explicit LmemNode(Node address_) : address{std::move(address_)} {} const Node& GetAddress() const { return address; @@ -649,7 +651,7 @@ private: /// Shared memory node class SmemNode final { public: - explicit SmemNode(Node address) : address{std::move(address)} {} + explicit SmemNode(Node address_) : address{std::move(address_)} {} const Node& GetAddress() const { return address; @@ -662,9 +664,9 @@ private: /// Global memory node class GmemNode final { public: - explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor) - : real_address{std::move(real_address)}, base_address{std::move(base_address)}, - descriptor{descriptor} {} + explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) + : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, + descriptor{descriptor_} {} const Node& GetRealAddress() const { return real_address; @@ -687,7 +689,7 @@ private: /// Commentary, can be dropped class CommentNode final { public: - explicit CommentNode(std::string text) : text{std::move(text)} {} + explicit CommentNode(std::string text_) : text{std::move(text_)} {} const std::string& GetText() const { return text; -- cgit v1.2.3 From edd8208779051dfd62382f9ed1f896a43f1b3f7c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 3 Dec 2020 16:03:31 -0500 Subject: node: Mark member functions as [[nodiscard]] where applicable Prevents logic bugs from accidentally ignoring the return value. --- src/video_core/shader/node.h | 58 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8b081030f..a1e2c4d8e 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -379,7 +379,7 @@ struct GlobalMemoryBase { u32 cbuf_index = 0; u32 cbuf_offset = 0; - bool operator<(const GlobalMemoryBase& rhs) const { + [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); } }; @@ -416,7 +416,7 @@ using Meta = class AmendNode { public: - std::optional GetAmendIndex() const { + [[nodiscard]] std::optional GetAmendIndex() const { if (amend_index == amend_null_index) { return std::nullopt; } @@ -454,19 +454,19 @@ public: explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - OperationCode GetCode() const { + [[nodiscard]] OperationCode GetCode() const { return code; } - const Meta& GetMeta() const { + [[nodiscard]] const Meta& GetMeta() const { return meta; } - std::size_t GetOperandsCount() const { + [[nodiscard]] std::size_t GetOperandsCount() const { return operands.size(); } - const Node& operator[](std::size_t operand_index) const { + [[nodiscard]] const Node& operator[](std::size_t operand_index) const { return operands.at(operand_index); } @@ -482,11 +482,11 @@ public: explicit ConditionalNode(Node condition_, std::vector&& code_) : condition{std::move(condition_)}, code{std::move(code_)} {} - const Node& GetCondition() const { + [[nodiscard]] const Node& GetCondition() const { return condition; } - const std::vector& GetCode() const { + [[nodiscard]] const std::vector& GetCode() const { return code; } @@ -500,7 +500,7 @@ class GprNode final { public: explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - u32 GetIndex() const { + [[nodiscard]] constexpr u32 GetIndex() const { return static_cast(index); } @@ -513,7 +513,7 @@ class CustomVarNode final { public: explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - constexpr u32 GetIndex() const { + [[nodiscard]] constexpr u32 GetIndex() const { return index; } @@ -526,7 +526,7 @@ class ImmediateNode final { public: explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - u32 GetValue() const { + [[nodiscard]] constexpr u32 GetValue() const { return value; } @@ -539,7 +539,7 @@ class InternalFlagNode final { public: explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - InternalFlag GetFlag() const { + [[nodiscard]] constexpr InternalFlag GetFlag() const { return flag; } @@ -553,11 +553,11 @@ public: explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) : index{index_}, negated{negated_} {} - Tegra::Shader::Pred GetIndex() const { + [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { return index; } - bool IsNegated() const { + [[nodiscard]] constexpr bool IsNegated() const { return negated; } @@ -577,23 +577,23 @@ public: explicit AbufNode(Node physical_address_, Node buffer_ = {}) : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - Tegra::Shader::Attribute::Index GetIndex() const { + [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { return index; } - u32 GetElement() const { + [[nodiscard]] u32 GetElement() const { return element; } - const Node& GetBuffer() const { + [[nodiscard]] const Node& GetBuffer() const { return buffer; } - bool IsPhysicalBuffer() const { + [[nodiscard]] bool IsPhysicalBuffer() const { return static_cast(physical_address); } - const Node& GetPhysicalAddress() const { + [[nodiscard]] const Node& GetPhysicalAddress() const { return physical_address; } @@ -607,9 +607,9 @@ private: /// Patch memory (used to communicate tessellation stages). class PatchNode final { public: - explicit PatchNode(u32 offset_) : offset{offset_} {} + explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - u32 GetOffset() const { + [[nodiscard]] constexpr u32 GetOffset() const { return offset; } @@ -622,11 +622,11 @@ class CbufNode final { public: explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - u32 GetIndex() const { + [[nodiscard]] u32 GetIndex() const { return index; } - const Node& GetOffset() const { + [[nodiscard]] const Node& GetOffset() const { return offset; } @@ -640,7 +640,7 @@ class LmemNode final { public: explicit LmemNode(Node address_) : address{std::move(address_)} {} - const Node& GetAddress() const { + [[nodiscard]] const Node& GetAddress() const { return address; } @@ -653,7 +653,7 @@ class SmemNode final { public: explicit SmemNode(Node address_) : address{std::move(address_)} {} - const Node& GetAddress() const { + [[nodiscard]] const Node& GetAddress() const { return address; } @@ -668,15 +668,15 @@ public: : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, descriptor{descriptor_} {} - const Node& GetRealAddress() const { + [[nodiscard]] const Node& GetRealAddress() const { return real_address; } - const Node& GetBaseAddress() const { + [[nodiscard]] const Node& GetBaseAddress() const { return base_address; } - const GlobalMemoryBase& GetDescriptor() const { + [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { return descriptor; } @@ -691,7 +691,7 @@ class CommentNode final { public: explicit CommentNode(std::string text_) : text{std::move(text_)} {} - const std::string& GetText() const { + [[nodiscard]] const std::string& GetText() const { return text; } -- cgit v1.2.3 From 677a8b208d47d0d2397197ce74c7039a8ea79d20 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 14:39:12 -0500 Subject: video_core: Resolve more variable shadowing scenarios Resolves variable shadowing scenarios up to the end of the OpenGL code to make it nicer to review. The rest will be resolved in a following commit. --- src/video_core/cdma_pusher.cpp | 30 +++++++------- src/video_core/cdma_pusher.h | 16 ++++---- src/video_core/command_classes/codecs/vp9.cpp | 2 +- src/video_core/command_classes/codecs/vp9.h | 2 +- src/video_core/dma_pusher.cpp | 9 +++- src/video_core/dma_pusher.h | 14 +++---- src/video_core/engines/engine_upload.cpp | 8 ++-- src/video_core/engines/engine_upload.h | 4 +- src/video_core/engines/kepler_memory.cpp | 4 +- src/video_core/engines/kepler_memory.h | 2 +- src/video_core/engines/maxwell_dma.cpp | 6 ++- src/video_core/engines/maxwell_dma.h | 4 +- src/video_core/fence_manager.h | 8 ++-- src/video_core/gpu.cpp | 8 +++- src/video_core/gpu.h | 12 +++--- src/video_core/gpu_asynch.cpp | 4 +- src/video_core/gpu_asynch.h | 2 +- src/video_core/gpu_synch.cpp | 2 +- src/video_core/gpu_synch.h | 2 +- src/video_core/gpu_thread.cpp | 16 ++++---- src/video_core/gpu_thread.h | 25 ++++++----- src/video_core/guest_driver.h | 4 +- src/video_core/macro/macro_hle.cpp | 6 +-- src/video_core/macro/macro_hle.h | 2 +- src/video_core/macro/macro_interpreter.cpp | 20 ++++----- src/video_core/macro/macro_interpreter.h | 10 ++--- src/video_core/macro/macro_jit_x64.cpp | 16 ++++---- src/video_core/macro/macro_jit_x64.h | 4 +- src/video_core/memory_manager.h | 4 +- .../renderer_opengl/gl_arb_decompiler.cpp | 48 +++++++++++----------- .../renderer_opengl/gl_fence_manager.cpp | 14 +++---- src/video_core/renderer_opengl/gl_fence_manager.h | 10 ++--- src/video_core/renderer_opengl/gl_query_cache.cpp | 15 +++---- src/video_core/renderer_opengl/gl_query_cache.h | 6 +-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 6 +-- src/video_core/renderer_opengl/gl_shader_cache.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 35 ++++++++-------- .../renderer_opengl/gl_shader_decompiler.h | 12 +++--- .../renderer_opengl/gl_texture_cache.cpp | 20 ++++----- src/video_core/renderer_opengl/gl_texture_cache.h | 5 ++- src/video_core/renderer_vulkan/vk_command_pool.cpp | 4 +- src/video_core/renderer_vulkan/vk_command_pool.h | 2 +- 42 files changed, 219 insertions(+), 206 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index b60f86260..e3e7432f7 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -29,8 +29,8 @@ #include "video_core/memory_manager.h" namespace Tegra { -CDmaPusher::CDmaPusher(GPU& gpu) - : gpu(gpu), nvdec_processor(std::make_shared(gpu)), +CDmaPusher::CDmaPusher(GPU& gpu_) + : gpu{gpu_}, nvdec_processor(std::make_shared(gpu)), vic_processor(std::make_unique(gpu, nvdec_processor)), host1x_processor(std::make_unique(gpu)), nvdec_sync(std::make_unique(gpu)), @@ -100,11 +100,11 @@ void CDmaPusher::Step() { } } -void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { +void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { switch (current_class) { case ChClassId::NvDec: - ThiStateWrite(nvdec_thi_state, offset, {data}); - switch (static_cast(offset)) { + ThiStateWrite(nvdec_thi_state, state_offset, {data}); + switch (static_cast(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); @@ -120,16 +120,16 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod( - static_cast(nvdec_thi_state.method_0), {data}); + nvdec_processor->ProcessMethod(static_cast(nvdec_thi_state.method_0), + {data}); break; default: break; } break; case ChClassId::GraphicsVic: - ThiStateWrite(vic_thi_state, static_cast(offset), {data}); - switch (static_cast(offset)) { + ThiStateWrite(vic_thi_state, static_cast(state_offset), {data}); + switch (static_cast(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); @@ -145,8 +145,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast(vic_thi_state.method_0), data); - vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), - {data}); + vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), {data}); break; default: break; @@ -155,7 +154,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ChClassId::Host1x: // This device is mainly for syncpoint synchronization LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); - host1x_processor->ProcessMethod(static_cast(offset), {data}); + host1x_processor->ProcessMethod(static_cast(state_offset), {data}); break; default: UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast(current_class)); @@ -163,9 +162,10 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { } } -void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments) { - u8* const state_offset = reinterpret_cast(&state) + sizeof(u32) * offset; - std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, + const std::vector& arguments) { + u8* const state_offset_ptr = reinterpret_cast(&state) + sizeof(u32) * state_offset; + std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size()); } } // namespace Tegra diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 982f309c5..0db1cd646 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -68,8 +68,8 @@ struct ChCommand { std::vector arguments; }; -using ChCommandHeaderList = std::vector; -using ChCommandList = std::vector; +using ChCommandHeaderList = std::vector; +using ChCommandList = std::vector; struct ThiRegisters { u32_le increment_syncpt{}; @@ -96,7 +96,7 @@ enum class ThiMethod : u32 { class CDmaPusher { public: - explicit CDmaPusher(GPU& gpu); + explicit CDmaPusher(GPU& gpu_); ~CDmaPusher(); /// Push NVDEC command buffer entries into queue @@ -109,17 +109,17 @@ public: void Step(); /// Invoke command class devices to execute the command based on the current state - void ExecuteCommand(u32 offset, u32 data); + void ExecuteCommand(u32 state_offset, u32 data); private: /// Write arguments value to the ThiRegisters member at the specified offset - void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments); + void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector& arguments); GPU& gpu; - std::shared_ptr nvdec_processor; - std::unique_ptr vic_processor; - std::unique_ptr host1x_processor; + std::shared_ptr nvdec_processor; + std::unique_ptr vic_processor; + std::unique_ptr host1x_processor; std::unique_ptr nvdec_sync; std::unique_ptr vic_sync; ChClassId current_class{}; diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 7d8d6ee3c..1771bc939 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -233,7 +233,7 @@ constexpr std::array map_lut{ } } // Anonymous namespace -VP9::VP9(GPU& gpu) : gpu(gpu) {} +VP9::VP9(GPU& gpu_) : gpu{gpu_} {} VP9::~VP9() = default; diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 9ebbbf59e..8396c8105 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -108,7 +108,7 @@ private: class VP9 { public: - explicit VP9(GPU& gpu); + explicit VP9(GPU& gpu_); ~VP9(); VP9(const VP9&) = delete; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index d8801b1f5..2c8b20024 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -13,7 +13,7 @@ namespace Tegra { -DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} +DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {} DmaPusher::~DmaPusher() = default; @@ -152,7 +152,12 @@ void DmaPusher::SetState(const CommandHeader& command_header) { void DmaPusher::CallMethod(u32 argument) const { if (dma_state.method < non_puller_methods) { - gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); + gpu.CallMethod(GPU::MethodCall{ + dma_state.method, + argument, + dma_state.subchannel, + dma_state.method_count, + }); } else { subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, dma_state.is_last_call); diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 96ac267f7..19f286fa7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -87,11 +87,11 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(std::vector&& prefetch_command_list) - : prefetch_command_list{std::move(prefetch_command_list)} {} + explicit CommandList(std::vector&& prefetch_command_list_) + : prefetch_command_list{std::move(prefetch_command_list_)} {} - std::vector command_lists; - std::vector prefetch_command_list; + std::vector command_lists; + std::vector prefetch_command_list; }; /** @@ -103,7 +103,7 @@ struct CommandList final { */ class DmaPusher final { public: - explicit DmaPusher(Core::System& system, GPU& gpu); + explicit DmaPusher(Core::System& system_, GPU& gpu_); ~DmaPusher(); void Push(CommandList&& entries) { @@ -112,7 +112,7 @@ public: void DispatchCalls(); - void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { subchannels[subchannel_id] = engine; } @@ -145,7 +145,7 @@ private: bool ib_enable{true}; ///< IB mode enabled - std::array subchannels{}; + std::array subchannels{}; GPU& gpu; Core::System& system; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index d44ad0cd8..71d7e1473 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -11,16 +11,16 @@ namespace Tegra::Engines::Upload { -State::State(MemoryManager& memory_manager, Registers& regs) - : regs{regs}, memory_manager{memory_manager} {} +State::State(MemoryManager& memory_manager_, Registers& regs_) + : regs{regs_}, memory_manager{memory_manager_} {} State::~State() = default; -void State::ProcessExec(const bool is_linear) { +void State::ProcessExec(const bool is_linear_) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; inner_buffer.resize(copy_size); - this->is_linear = is_linear; + is_linear = is_linear_; } void State::ProcessData(const u32 data, const bool is_last_call) { diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 462da419e..1c7f1effa 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -54,10 +54,10 @@ struct Registers { class State { public: - State(MemoryManager& memory_manager, Registers& regs); + explicit State(MemoryManager& memory_manager_, Registers& regs_); ~State(); - void ProcessExec(bool is_linear); + void ProcessExec(bool is_linear_); void ProcessData(u32 data, bool is_last_call); private: diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index dc71b2eec..9911140e9 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,8 +14,8 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) - : system{system}, upload_state{memory_manager, regs.upload} {} +KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager) + : system{system_}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5b7f71a00..62483589e 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -35,7 +35,7 @@ namespace Tegra::Engines { class KeplerMemory final : public EngineInterface { public: - KeplerMemory(Core::System& system, MemoryManager& memory_manager); + explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 8fa359d0a..1c29e895e 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -16,8 +16,10 @@ namespace Tegra::Engines { using namespace Texture; -MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) - : system{system}, memory_manager{memory_manager} {} +MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_} {} + +MaxwellDMA::~MaxwellDMA() = default; void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 50f445efc..17bd280c4 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -185,8 +185,8 @@ public: }; static_assert(sizeof(RemapConst) == 12); - explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); - ~MaxwellDMA() = default; + explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); + ~MaxwellDMA(); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index de6991ef6..c5f26896e 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -17,11 +17,11 @@ namespace VideoCommon { class FenceBase { public: - FenceBase(u32 payload, bool is_stubbed) - : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} + explicit FenceBase(u32 payload_, bool is_stubbed_) + : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} - FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) - : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} + explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_) + : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} GPUVAddr GetAddress() const { return address; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e91f52938..964b3f3dc 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -232,8 +232,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); } else { for (std::size_t i = 0; i < amount; i++) { - CallPullerMethod( - {method, base_start[i], subchannel, methods_pending - static_cast(i)}); + CallPullerMethod(MethodCall{ + method, + base_start[i], + subchannel, + methods_pending - static_cast(i), + }); } } } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 21410e125..660641d04 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -149,16 +149,16 @@ public: u32 subchannel{}; u32 method_count{}; - MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) - : method(method), argument(argument), subchannel(subchannel), - method_count(method_count) {} + explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0) + : method(method_), argument(argument_), subchannel(subchannel_), + method_count(method_count_) {} [[nodiscard]] bool IsLastCall() const { return method_count <= 1; } }; - explicit GPU(Core::System& system, bool is_async, bool use_nvdec); + explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); virtual ~GPU(); /// Binds a renderer to the GPU. @@ -414,8 +414,8 @@ private: std::condition_variable sync_cv; struct FlushRequest { - FlushRequest(u64 fence, VAddr addr, std::size_t size) - : fence{fence}, addr{addr}, size{size} {} + explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) + : fence{fence_}, addr{addr_}, size{size_} {} u64 fence; VAddr addr; std::size_t size; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index a9baaf7ef..6cc091ecd 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,8 +10,8 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec) - : GPU{system, true, use_nvdec}, gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_) + : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {} GPUAsynch::~GPUAsynch() = default; diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 0c0872e73..a384113f4 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,7 +20,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, bool use_nvdec); + explicit GPUAsynch(Core::System& system_, bool use_nvdec_); ~GPUAsynch() override; void Start() override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index ecf7bbdf3..1e9d4b9b2 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,7 +7,7 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {} +GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {} GPUSynch::~GPUSynch() = default; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 9d778c71a..c5904b8db 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,7 +19,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, bool use_nvdec); + explicit GPUSynch(Core::System& system_, bool use_nvdec_); ~GPUSynch() override; void Start() override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4b8f58283..e27218b96 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -39,23 +39,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, CommandDataContainer next; while (state.is_running) { next = state.queue.PopWait(); - if (const auto submit_list = std::get_if(&next.data)) { + if (auto* submit_list = std::get_if(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); - } else if (const auto command_list = std::get_if(&next.data)) { + } else if (auto* command_list = std::get_if(&next.data)) { // NVDEC cdma_pusher.Push(std::move(command_list->entries)); cdma_pusher.DispatchCalls(); - } else if (const auto data = std::get_if(&next.data)) { + } else if (const auto* data = std::get_if(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative(next.data)) { renderer.Rasterizer().ReleaseFences(); } else if (std::holds_alternative(next.data)) { system.GPU().TickWork(); - } else if (const auto data = std::get_if(&next.data)) { - renderer.Rasterizer().FlushRegion(data->addr, data->size); - } else if (const auto data = std::get_if(&next.data)) { - renderer.Rasterizer().OnCPUWrite(data->addr, data->size); + } else if (const auto* flush = std::get_if(&next.data)) { + renderer.Rasterizer().FlushRegion(flush->addr, flush->size); + } else if (const auto* invalidate = std::get_if(&next.data)) { + renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative(next.data)) { return; } else { @@ -65,7 +65,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } } -ThreadManager::ThreadManager(Core::System& system) : system{system} {} +ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} ThreadManager::~ThreadManager() { if (!thread.joinable()) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 32a34e3a7..f1c52cd9e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -32,30 +32,30 @@ struct EndProcessingCommand final {}; /// Command to signal to the GPU thread that a command list is ready for processing struct SubmitListCommand final { - explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} Tegra::CommandList entries; }; /// Command to signal to the GPU thread that a cdma command list is ready for processing struct SubmitChCommandEntries final { - explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries) - : entries{std::move(entries)} {} + explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries_) + : entries{std::move(entries_)} {} Tegra::ChCommandHeaderList entries; }; /// Command to signal to the GPU thread that a swap buffers is pending struct SwapBuffersCommand final { - explicit SwapBuffersCommand(std::optional framebuffer) - : framebuffer{std::move(framebuffer)} {} + explicit SwapBuffersCommand(std::optional framebuffer_) + : framebuffer{std::move(framebuffer_)} {} std::optional framebuffer; }; /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -63,7 +63,7 @@ struct FlushRegionCommand final { /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -71,8 +71,8 @@ struct InvalidateRegionCommand final { /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) - : addr{addr}, size{size} {} + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) + : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -92,8 +92,8 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - CommandDataContainer(CommandData&& data, u64 next_fence) - : data{std::move(data)}, fence{next_fence} {} + explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) + : data{std::move(data_)}, fence{next_fence_} {} CommandData data; u64 fence{}; @@ -112,7 +112,7 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system); + explicit ThreadManager(Core::System& system_); ~ThreadManager(); /// Creates and starts the GPU thread. @@ -146,7 +146,6 @@ private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); -private: SynchState state; Core::System& system; std::thread thread; diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index 99450777e..21e569ba1 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -19,8 +19,8 @@ namespace VideoCore { class GuestDriverProfile { public: explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional texture_handler_size) - : texture_handler_size{texture_handler_size} {} + explicit GuestDriverProfile(std::optional texture_handler_size_) + : texture_handler_size{texture_handler_size_} {} void DeduceTextureHandlerSize(std::vector bound_offsets); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index df00b57df..70ac7c620 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -85,7 +85,7 @@ constexpr std::array, 3> hle_funcs{{ {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; -HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} HLEMacro::~HLEMacro() = default; std::optional> HLEMacro::GetHLEProgram(u64 hash) const { @@ -99,8 +99,8 @@ std::optional> HLEMacro::GetHLEProgram(u64 hash) co HLEMacroImpl::~HLEMacroImpl() = default; -HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) - : maxwell3d(maxwell3d), func(func) {} +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) + : maxwell3d{maxwell3d_}, func{func_} {} void HLEMacroImpl::Execute(const std::vector& parameters, u32 method) { func(maxwell3d, parameters); diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index 37af875a0..cb3bd1600 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector> GetHLEProgram(u64 hash) const; diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index bd01fd1f2..44a71aa6c 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,29 +11,29 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) - : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} std::unique_ptr MacroInterpreter::Compile(const std::vector& code) { return std::make_unique(maxwell3d, code); } -MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, - const std::vector& code) - : maxwell3d(maxwell3d), code(code) {} +MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, + const std::vector& code_) + : maxwell3d{maxwell3d_}, code{code_} {} -void MacroInterpreterImpl::Execute(const std::vector& parameters, u32 method) { +void MacroInterpreterImpl::Execute(const std::vector& params, u32 method) { MICROPROFILE_SCOPE(MacroInterp); Reset(); - registers[1] = parameters[0]; - num_parameters = parameters.size(); + registers[1] = params[0]; + num_parameters = params.size(); if (num_parameters > parameters_capacity) { parameters_capacity = num_parameters; - this->parameters = std::make_unique(num_parameters); + parameters = std::make_unique(num_parameters); } - std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); + std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); // Execute the code until we hit an exit condition. bool keep_executing = true; diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index 90217fc89..d50c619ce 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -17,7 +17,7 @@ class Maxwell3D; class MacroInterpreter final : public MacroEngine { public: - explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_); protected: std::unique_ptr Compile(const std::vector& code) override; @@ -28,8 +28,8 @@ private: class MacroInterpreterImpl : public CachedMacro { public: - MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector& code); - void Execute(const std::vector& parameters, u32 method) override; + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_); + void Execute(const std::vector& params, u32 method) override; private: /// Resets the execution engine state, zeroing registers, etc. @@ -38,9 +38,9 @@ private: /** * Executes a single macro instruction located at the current program counter. Returns whether * the interpreter should keep running. - * @param offset Offset to start execution at. + * * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. + * previous instruction. */ bool Step(bool is_delay_slot); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 954b87515..c82bb987f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,15 +28,15 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) - : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} std::unique_ptr MacroJITx64::Compile(const std::vector& code) { return std::make_unique(maxwell3d, code); } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code) - : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { +MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { Compile(); } @@ -553,15 +553,15 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { } void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { + const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. - if (reg == 0) { + if (reg_index == 0) { return; } - mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); }; - const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index a180e7428..7f50ac2f8 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -23,7 +23,7 @@ constexpr size_t MAX_CODE_SIZE = 0x10000; class MacroJITx64 final : public MacroEngine { public: - explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); + explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); protected: std::unique_ptr Compile(const std::vector& code) override; @@ -34,7 +34,7 @@ private: class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { public: - MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code); + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_); ~MacroJITx64Impl(); void Execute(const std::vector& parameters, u32 method) override; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index c078193d9..c35e57689 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -28,7 +28,7 @@ public: }; constexpr PageEntry() = default; - constexpr PageEntry(State state) : state{state} {} + constexpr PageEntry(State state_) : state{state_} {} constexpr PageEntry(VAddr addr) : state{static_cast(addr >> ShiftBits)} {} [[nodiscard]] constexpr bool IsUnmapped() const { @@ -68,7 +68,7 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); class MemoryManager final { public: - explicit MemoryManager(Core::System& system); + explicit MemoryManager(Core::System& system_); ~MemoryManager(); /// Binds a renderer to the memory manager. diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index d6120c23e..5378c398e 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) { class ARBDecompiler final { public: - explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier); + explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier); std::string Code() const { return shader_source; @@ -802,9 +802,9 @@ private: }; }; -ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier) - : device{device}, ir{ir}, registry{registry}, stage{stage} { +ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier) + : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { DefineGlobalMemory(); AddLine("TEMP RC;"); @@ -1134,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(ast->condition); + } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { + const std::string condition = VisitExpression(if_then->condition); ResetTemporaries(); AddLine("MOVC.U RC.x, {};", condition); AddLine("IF NE.x;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("ENDIF;"); - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { + } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { AddLine("ELSE;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - VisitBlock(ast->nodes); - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); + } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { + VisitBlock(decoded->nodes); + } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { + AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); ResetTemporaries(); - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(ast->condition); + } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { + const std::string condition = VisitExpression(do_while->condition); ResetTemporaries(); AddLine("REP;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("MOVC.U RC.x, {};", condition); AddLine("BRK (NE.x);"); AddLine("ENDREP;"); - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast->condition); + } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { + const bool is_true = ExprIsTrue(ast_return->condition); if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); AddLine("IF NE.x;"); ResetTemporaries(); } - if (ast->kills) { + if (ast_return->kills) { AddLine("KIL TR;"); } else { Exit(); @@ -1179,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { if (!is_true) { AddLine("ENDIF;"); } - } else if (const auto ast = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast->condition)) { + } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { + if (ExprIsTrue(ast_break->condition)) { AddLine("BRK;"); } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); AddLine("BRK (NE.x);"); ResetTemporaries(); } diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index b532fdcc2..6040646cb 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -11,10 +11,10 @@ namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} +GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} -GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) - : FenceBase(address, payload, is_stubbed) {} +GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_) + : FenceBase{address_, payload_, is_stubbed_} {} GLInnerFence::~GLInnerFence() = default; @@ -45,10 +45,10 @@ void GLInnerFence::Wait() { glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); } -FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - TextureCacheOpenGL& texture_cache, - OGLBufferCache& buffer_cache, QueryCache& query_cache) - : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {} +FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, + Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, + OGLBufferCache& buffer_cache_, QueryCache& query_cache_) + : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { return std::make_shared(value, is_stubbed); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index da1dcdace..39ca6125b 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -17,8 +17,8 @@ namespace OpenGL { class GLInnerFence : public VideoCommon::FenceBase { public: - GLInnerFence(u32 payload, bool is_stubbed); - GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); + explicit GLInnerFence(u32 payload_, bool is_stubbed_); + explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_); ~GLInnerFence(); void Queue(); @@ -37,9 +37,9 @@ using GenericFenceManager = class FenceManagerOpenGL final : public GenericFenceManager { public: - explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, - QueryCache& query_cache); + explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, + QueryCache& query_cache_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 1a3d9720e..bcc37471f 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -59,10 +59,10 @@ bool QueryCache::AnyCommandQueued() const noexcept { return gl_rasterizer.AnyCommandQueued(); } -HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, - VideoCore::QueryType type) - : VideoCommon::HostCounterBase{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)} { +HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr dependency, + VideoCore::QueryType type_) + : HostCounterBase{std::move(dependency)}, cache{cache_}, type{type_}, + query{cache.AllocateQuery(type)} { glBeginQuery(GetTarget(type), query.handle); } @@ -86,13 +86,14 @@ u64 HostCounter::BlockingQuery() const { return static_cast(value); } -CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) - : VideoCommon::CachedQueryBase{cpu_addr, host_ptr}, cache{&cache}, type{type} {} +CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr, + u8* host_ptr) + : CachedQueryBase{cpu_addr, host_ptr}, cache{&cache_}, type{type_} {} CachedQuery::~CachedQuery() = default; CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept - : VideoCommon::CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { cache = rhs.cache; diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 82cac51ee..d9851e880 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -46,8 +46,8 @@ private: class HostCounter final : public VideoCommon::HostCounterBase { public: - explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, - VideoCore::QueryType type); + explicit HostCounter(QueryCache& cache_, std::shared_ptr dependency, + VideoCore::QueryType type_); ~HostCounter(); void EndQuery(); @@ -62,7 +62,7 @@ private: class CachedQuery final : public VideoCommon::CachedQueryBase { public: - explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, + explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr, u8* host_ptr); ~CachedQuery() override; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bd56bed0c..9f2c0a222 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -198,10 +198,10 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built) +Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, + ProgramSharedPtr program_, bool is_built_) : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built(is_built) { + is_built{is_built_} { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 1708af06a..ab5374fac 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -108,7 +108,7 @@ public: private: explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built = true); + ProgramSharedPtr program, bool is_built_ = true); std::shared_ptr registry; ShaderEntries entries; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 95ca96c8e..0940969ba 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -131,7 +131,7 @@ private: class Expression final { public: - Expression(std::string code, Type type) : code{std::move(code)}, type{type} { + Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { ASSERT(type != Type::Void); } Expression() : type{Type::Void} {} @@ -148,8 +148,8 @@ public: ASSERT(type == Type::Void); } - std::string As(Type type) const { - switch (type) { + std::string As(Type type_) const { + switch (type_) { case Type::Bool: return AsBool(); case Type::Bool2: @@ -418,11 +418,12 @@ struct GenericVaryingDescription { class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, std::string_view suffix) - : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, - suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ - UseUnifiedUniforms(device, ir, stage)} { + explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier_, + std::string_view suffix_) + : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, + suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ + UseUnifiedUniforms(device_, ir_, stage_)} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -777,16 +778,16 @@ private: name = "gs_" + name + "[]"; } - std::string suffix; + std::string suffix_; if (stage == ShaderType::Fragment) { const auto input_mode{header.ps.GetPixelImap(location)}; if (input_mode == PixelImap::Unused) { return; } - suffix = GetInputFlags(input_mode); + suffix_ = GetInputFlags(input_mode); } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); + code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); } void DeclareOutputAttributes() { @@ -2100,13 +2101,13 @@ private: const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; const bool separate_dc = meta.sampler.is_shadow; - std::vector ir; + std::vector ir_; if (meta.sampler.is_shadow) { - ir = {TextureOffset{}}; + ir_ = {TextureOffset{}}; } else { - ir = {TextureOffset{}, TextureArgument{type, meta.component}}; + ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; } - return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element), + return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), Type::Float}; } @@ -2801,7 +2802,7 @@ std::string GetFlowVariable(u32 index) { class ExprDecompiler { public: - explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ExprAnd& expr) { inner += '('; @@ -2856,7 +2857,7 @@ private: class ASTDecompiler { public: - explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 451c9689a..f5a5249f2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -25,8 +25,8 @@ using ImageEntry = VideoCommon::Shader::Image; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: - explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index) - : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {} + explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index_) + : ConstBuffer{max_offset, is_indirect}, index{index_} {} u32 GetIndex() const { return index; @@ -37,10 +37,10 @@ private: }; struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, - bool is_written) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ - is_written} {} + constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, + bool is_written_) + : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ + is_written_} {} u32 cbuf_index = 0; u32 cbuf_offset = 0; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a863ef218..a59fe853e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -258,9 +258,9 @@ constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, Swiz } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : VideoCommon::SurfaceBase(gpu_addr, params, is_astc_supported) { +CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, + bool is_astc_supported_) + : SurfaceBase{gpu_addr_, params_, is_astc_supported_} { if (is_converted) { internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; format = GL_RGBA; @@ -419,11 +419,11 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr return view; } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - bool is_proxy) - : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, - target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { - if (!is_proxy) { +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, + bool is_proxy_) + : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, + target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { + if (!is_proxy_) { main_view = CreateTextureView(); } } @@ -493,13 +493,13 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou std::array swizzle{x_source, y_source, z_source, w_source}; - switch (const PixelFormat format = GetSurfaceParams().pixel_format) { + switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { case PixelFormat::D24_UNORM_S8_UINT: case PixelFormat::D32_FLOAT_S8_UINT: case PixelFormat::S8_UINT_D24_UNORM: UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - GetComponent(format, x_source == SwizzleSource::R)); + GetComponent(pixel_format, x_source == SwizzleSource::R)); // Make sure we sample the first component std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 7787134fc..76a7b2316 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -37,7 +37,8 @@ class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); + explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, + bool is_astc_supported_); ~CachedSurface(); void UploadTexture(const std::vector& staging_buffer) override; @@ -77,7 +78,7 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); + explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); ~CachedSurfaceView(); /// @brief Attaches this texture view to the currently bound fb_target framebuffer diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 6339f4fe0..256a39148 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -17,8 +17,8 @@ struct CommandPool::Pool { vk::CommandBuffers cmdbufs; }; -CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) - : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} +CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device_) + : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device_} {} CommandPool::~CommandPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index b9cb3fb5d..33655eca4 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -17,7 +17,7 @@ class VKDevice; class CommandPool final : public ResourcePool { public: - explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); + explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device_); ~CommandPool() override; void Allocate(size_t begin, size_t end) override; -- cgit v1.2.3 From 94af77aa7c26b65365fecc6230c2110c10ff16b5 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 16:23:10 -0500 Subject: codec: Remove deprecated usage of AVCodecContext::refcounted_frames This was only necessary for use with the avcodec_decode_video2/avcoded_decode_audio4 APIs which are also deprecated. Given we use avcodec_send_packet/avcodec_receive_frame, this isn't necessary, this is even indicated directly within the FFmpeg API changes document here on 2017-09-26: https://github.com/FFmpeg/FFmpeg/blob/master/doc/APIchanges#L410 This prevents our code from breaking whenever we update to a newer version of FFmpeg in the future if they ever decide to fully remove this API member. --- src/video_core/command_classes/codecs/codec.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 9a88f64e4..f547f5bd4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -67,7 +67,6 @@ void Codec::Decode() { } av_codec_ctx = avcodec_alloc_context3(av_codec); - av_codec_ctx->refcounted_frames = 1; av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); // TODO(ameerj): libavcodec gpu hw acceleration -- cgit v1.2.3 From 37d672bf08c810e48dd48a08fc33ec28a6c8a6cf Mon Sep 17 00:00:00 2001 From: FearlessTobi Date: Sat, 5 Dec 2020 02:42:50 +0100 Subject: Fix telemetry-related exit crash from use-after-free Co-Authored-By: xperia64 --- src/video_core/renderer_opengl/renderer_opengl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c869bb0e2..1523cd6fa 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -275,9 +275,9 @@ void RendererOpenGL::AddTelemetryFields() { LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; - telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); - telemetry_session.AddField(user_system, "GPU_Model", gpu_model); - telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version); + telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor)); + telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model)); + telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } void RendererOpenGL::CreateRasterizer() { -- cgit v1.2.3 From 414a87a4f4570344140d77a7985b4d118b754341 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 5 Dec 2020 04:51:14 -0500 Subject: video_core: Resolve more variable shadowing scenarios pt.2 Migrates the video core code closer to enabling variable shadowing warnings as errors. This primarily sorts out shadowing occurrences within the Vulkan code. --- src/video_core/command_classes/codecs/vp9.cpp | 40 +++++++------- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 4 +- src/video_core/renderer_vulkan/renderer_vulkan.h | 6 +-- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 40 +++++++------- src/video_core/renderer_vulkan/vk_compute_pass.h | 23 ++++---- .../renderer_vulkan/vk_compute_pipeline.cpp | 16 +++--- .../renderer_vulkan/vk_compute_pipeline.h | 8 +-- src/video_core/renderer_vulkan/vk_device.cpp | 16 +++--- .../renderer_vulkan/vk_fence_manager.cpp | 22 ++++---- src/video_core/renderer_vulkan/vk_fence_manager.h | 16 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 38 +++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 14 ++--- src/video_core/renderer_vulkan/vk_image.cpp | 14 ++--- src/video_core/renderer_vulkan/vk_image.h | 4 +- .../renderer_vulkan/vk_memory_manager.cpp | 18 +++---- src/video_core/renderer_vulkan/vk_memory_manager.h | 10 ++-- src/video_core/renderer_vulkan/vk_query_cache.cpp | 30 +++++------ src/video_core/renderer_vulkan/vk_query_cache.h | 10 ++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 17 +++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 12 +++-- .../renderer_vulkan/vk_renderpass_cache.cpp | 2 +- .../renderer_vulkan/vk_renderpass_cache.h | 2 +- .../renderer_vulkan/vk_sampler_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_sampler_cache.h | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 28 +++++----- .../renderer_vulkan/vk_shader_decompiler.h | 8 +-- .../renderer_vulkan/vk_texture_cache.cpp | 61 +++++++++++----------- src/video_core/renderer_vulkan/vk_texture_cache.h | 18 +++---- .../renderer_vulkan/vk_update_descriptor.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- src/video_core/renderer_vulkan/wrapper.cpp | 16 +++--- src/video_core/renderer_vulkan/wrapper.h | 25 +++++---- src/video_core/shader/control_flow.cpp | 6 +-- src/video_core/shader/decode/image.cpp | 10 ++-- src/video_core/shader/decode/other.cpp | 10 ++-- src/video_core/texture_cache/surface_base.cpp | 27 +++++----- src/video_core/texture_cache/surface_base.h | 4 +- src/video_core/texture_cache/surface_params.cpp | 14 ++--- 39 files changed, 305 insertions(+), 296 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 1771bc939..59e586695 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -374,43 +374,43 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { } Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { - Vp9FrameContainer frame{}; + Vp9FrameContainer current_frame{}; { gpu.SyncGuestHost(); - frame.info = GetVp9PictureInfo(state); - frame.bit_stream.resize(frame.info.bitstream_size); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), - frame.info.bitstream_size); + current_frame.info = GetVp9PictureInfo(state); + current_frame.bit_stream.resize(current_frame.info.bitstream_size); + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), + current_frame.info.bitstream_size); } // Buffer two frames, saving the last show frame info if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ - .info = frame.info, - .bit_stream = std::move(frame.bit_stream), + .info = current_frame.info, + .bit_stream = std::move(current_frame.bit_stream), }; - next_next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_next_frame.info; - frame.bit_stream = std::move(next_next_frame.bit_stream); + next_next_frame.info.show_frame = current_frame.info.last_frame_shown; + current_frame.info = next_next_frame.info; + current_frame.bit_stream = std::move(next_next_frame.bit_stream); next_next_frame = std::move(temp); if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp2{ - .info = frame.info, - .bit_stream = std::move(frame.bit_stream), + .info = current_frame.info, + .bit_stream = std::move(current_frame.bit_stream), }; - next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_frame.info; - frame.bit_stream = std::move(next_frame.bit_stream); + next_frame.info.show_frame = current_frame.info.last_frame_shown; + current_frame.info = next_frame.info; + current_frame.bit_stream = std::move(next_frame.bit_stream); next_frame = std::move(temp2); } else { - next_frame.info = frame.info; - next_frame.bit_stream = std::move(frame.bit_stream); + next_frame.info = current_frame.info; + next_frame.bit_stream = std::move(current_frame.bit_stream); } } else { - next_next_frame.info = frame.info; - next_next_frame.bit_stream = std::move(frame.bit_stream); + next_next_frame.info = current_frame.info; + next_next_frame.bit_stream = std::move(current_frame.bit_stream); } - return frame; + return current_frame; } std::vector VP9::ComposeCompressedHeader() { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index a2173edd2..ea4b7c1e6 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -243,8 +243,8 @@ std::string BuildCommaSeparatedExtensions(std::vector available_ext RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, - std::unique_ptr context) - : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, + std::unique_ptr context_) + : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, cpu_memory{cpu_memory_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 1044ca124..977b86003 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -45,9 +45,9 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, - Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, - Tegra::GPU& gpu, - std::unique_ptr context); + Core::Frontend::EmuWindow& emu_window, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr context_); ~RendererVulkan() override; bool Init() override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 9637c6059..1ac7e2a30 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -461,15 +461,15 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return set; } -QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), +QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} QuadArrayPass::~QuadArrayPass() = default; @@ -510,14 +510,14 @@ std::pair QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), +Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), uint8_pass), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} Uint8Pass::~Uint8Pass() = default; @@ -555,16 +555,16 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } -QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), +QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), QUAD_INDEXED_SPV), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} QuadIndexedPass::~QuadIndexedPass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index acc94f27e..2dc87902c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -43,10 +43,10 @@ private: class QuadArrayPass final : public VKComputePass { public: - explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadArrayPass(); std::pair Assemble(u32 num_vertices, u32 first); @@ -59,9 +59,10 @@ private: class Uint8Pass final : public VKComputePass { public: - explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); std::pair Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); @@ -74,10 +75,10 @@ private: class QuadIndexedPass final : public VKComputePass { public: - explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); std::pair Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9be72dc9b..62f44d6da 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -15,16 +15,16 @@ namespace Vulkan { -VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - const SPIRVShader& shader) - : device{device}, scheduler{scheduler}, entries{shader.entries}, +VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const SPIRVShader& shader_) + : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, + descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {} + shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} VKComputePipeline::~VKComputePipeline() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 6e2f22a4a..49e2113a2 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -17,10 +17,10 @@ class VKUpdateDescriptorQueue; class VKComputePipeline final { public: - explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - const SPIRVShader& shader); + explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const SPIRVShader& shader_); ~VKComputePipeline(); VkDescriptorSet CommitDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f34ed6735..ce3846195 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -491,8 +491,8 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; for (const auto format : astc_formats) { - const auto format_properties{physical.GetFormatProperties(format)}; - if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + const auto physical_format_properties{physical.GetFormatProperties(format)}; + if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) { return false; } } @@ -644,8 +644,8 @@ std::vector VKDevice::LoadExtensions() { VkPhysicalDeviceFeatures2KHR features; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - VkPhysicalDeviceProperties2KHR properties; - properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + VkPhysicalDeviceProperties2KHR physical_properties; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; if (has_khr_shader_float16_int8) { VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; @@ -670,8 +670,8 @@ std::vector VKDevice::LoadExtensions() { subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; subgroup_properties.pNext = nullptr; - properties.pNext = &subgroup_properties; - physical.GetProperties2KHR(properties); + physical_properties.pNext = &subgroup_properties; + physical.GetProperties2KHR(physical_properties); is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; @@ -695,8 +695,8 @@ std::vector VKDevice::LoadExtensions() { VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; tfb_properties.pNext = nullptr; - properties.pNext = &tfb_properties; - physical.GetProperties2KHR(properties); + physical_properties.pNext = &tfb_properties; + physical.GetProperties2KHR(physical_properties); if (tfb_features.transformFeedback && tfb_features.geometryStreams && tfb_properties.maxTransformFeedbackStreams >= 4 && diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 5babbdd0b..0bcaee714 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -14,12 +14,13 @@ namespace Vulkan { -InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} +InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, + bool is_stubbed_) + : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} -InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, - u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} +InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, + u32 payload_, bool is_stubbed_) + : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} InnerFence::~InnerFence() = default; @@ -71,11 +72,12 @@ bool InnerFence::IsEventSignalled() const { } } -VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, - VKBufferCache& buffer_cache, VKQueryCache& query_cache, - const VKDevice& device_, VKScheduler& scheduler_) - : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), +VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + Tegra::MemoryManager& memory_manager_, + VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, + VKQueryCache& query_cache_, const VKDevice& device_, + VKScheduler& scheduler_) + : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, device{device_}, scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 1547d6d30..c8547cc24 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -28,10 +28,10 @@ class VKTextureCache; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, - bool is_stubbed); - explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, - u32 payload, bool is_stubbed); + explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, + bool is_stubbed_); + explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, + u32 payload_, bool is_stubbed_); ~InnerFence(); void Queue(); @@ -55,10 +55,10 @@ using GenericFenceManager = class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, - VKBufferCache& buffer_cache, VKQueryCache& query_cache, - const VKDevice& device, VKScheduler& scheduler); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, + VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, + const VKDevice& device_, VKScheduler& scheduler_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0e8f9c352..f8a1bcf34 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -71,21 +71,21 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { } // Anonymous namespace -VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( - program)}, - renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, - pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} +VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_, + const GraphicsPipelineCacheKey& key_, + vk::Span bindings_, + const SPIRVProgram& program_) + : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, + descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, + descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, + descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( + program_)}, + renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, + pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -162,8 +162,8 @@ std::vector VKGraphicsPipeline::CreateShaderModules( .codeSize = 0, }; - std::vector modules; - modules.reserve(Maxwell::MaxShaderStage); + std::vector shader_modules; + shader_modules.reserve(Maxwell::MaxShaderStage); for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { const auto& stage = program[i]; if (!stage) { @@ -174,9 +174,9 @@ std::vector VKGraphicsPipeline::CreateShaderModules( ci.codeSize = stage->code.size() * sizeof(u32); ci.pCode = stage->code.data(); - modules.push_back(device.GetLogical().CreateShaderModule(ci)); + shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); } - return modules; + return shader_modules; } vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 58aa35efd..3fb31d55a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -51,13 +51,13 @@ using SPIRVProgram = std::array, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: - explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program); + explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_, + const GraphicsPipelineCacheKey& key_, + vk::Span bindings_, + const SPIRVProgram& program_); ~VKGraphicsPipeline(); VkDescriptorSet CommitDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 1c418ea17..072d14e3b 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -13,18 +13,18 @@ namespace Vulkan { -VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci, - VkImageAspectFlags aspect_mask) - : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, - image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { - UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, +VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, + const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) + : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, + image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { + UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, "Queue family tracking is not implemented"); - image = device.GetLogical().CreateImage(image_ci); + image = device_.GetLogical().CreateImage(image_ci_); const u32 num_ranges = image_num_layers * image_num_levels; barriers.resize(num_ranges); - subrange_states.resize(num_ranges, {{}, image_ci.initialLayout}); + subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); } VKImage::~VKImage() = default; diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h index b4d7229e5..287ab90ca 100644 --- a/src/video_core/renderer_vulkan/vk_image.h +++ b/src/video_core/renderer_vulkan/vk_image.h @@ -17,8 +17,8 @@ class VKScheduler; class VKImage { public: - explicit VKImage(const VKDevice& device, VKScheduler& scheduler, - const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask); + explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, + const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); ~VKImage(); /// Records in the passed command buffer an image transition and updates the state of the image. diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 24c8960ac..be53d450f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -29,10 +29,10 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: - explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) - : device{device}, memory{std::move(memory)}, properties{properties}, - allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} + explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, + VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) + : device{device_}, memory{std::move(memory_)}, properties{properties_}, + allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { auto found = TryFindFreeSection(free_iterator, allocation_size, @@ -117,8 +117,8 @@ private: std::vector commits; }; -VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {} +VKMemoryManager::VKMemoryManager(const VKDevice& device_) + : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} VKMemoryManager::~VKMemoryManager() = default; @@ -207,9 +207,9 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi return {}; } -VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - const vk::DeviceMemory& memory, u64 begin, u64 end) - : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} +VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + const vk::DeviceMemory& memory_, u64 begin_, u64 end_) + : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 1af88e3d4..39f903ec8 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -21,7 +21,7 @@ using VKMemoryCommit = std::unique_ptr; class VKMemoryManager final { public: - explicit VKMemoryManager(const VKDevice& device); + explicit VKMemoryManager(const VKDevice& device_); VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); @@ -58,8 +58,8 @@ class VKMemoryCommitImpl final { friend MemoryMap; public: - explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - const vk::DeviceMemory& memory, u64 begin, u64 end); + explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + const vk::DeviceMemory& memory_, u64 begin_, u64 end_); ~VKMemoryCommitImpl(); /// Maps a memory region and returns a pointer to it. @@ -93,8 +93,8 @@ private: /// Holds ownership of a memory map. class MemoryMap final { public: - explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) - : commit{commit}, address{address} {} + explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) + : commit{commit_}, address{address_} {} ~MemoryMap() { if (commit) { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ee2d871e3..6fa071737 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -66,15 +66,15 @@ void QueryPool::Reserve(std::pair query) { usage[pool_index * GROW_STEP + static_cast(query.second)] = false; } -VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKScheduler& scheduler) - : VideoCommon::QueryCacheBase{rasterizer, maxwell3d, gpu_memory}, - device{device}, scheduler{scheduler}, query_pools{ - QueryPool{device, scheduler, - QueryType::SamplesPassed}, - } {} +VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + const VKDevice& device_, VKScheduler& scheduler_) + : QueryCacheBase{rasterizer_, maxwell3d_, + gpu_memory_}, + device{device_}, scheduler{scheduler_}, query_pools{ + QueryPool{device_, scheduler_, + QueryType::SamplesPassed}, + } {} VKQueryCache::~VKQueryCache() { // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class @@ -95,12 +95,12 @@ void VKQueryCache::Reserve(QueryType type, std::pair query) { query_pools[static_cast(type)].Reserve(query); } -HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr dependency, - QueryType type) - : VideoCommon::HostCounterBase{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} { - const vk::Device* logical = &cache.Device().GetLogical(); - cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { +HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr dependency_, + QueryType type_) + : HostCounterBase{std::move(dependency_)}, cache{cache_}, + type{type_}, query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { + const vk::Device* logical = &cache_.Device().GetLogical(); + cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 2e57fb75d..201fca888 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -53,9 +53,9 @@ private: class VKQueryCache final : public VideoCommon::QueryCacheBase { public: - explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKScheduler& scheduler); + explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + const VKDevice& device_, VKScheduler& scheduler_); ~VKQueryCache(); std::pair AllocateQuery(VideoCore::QueryType type); @@ -78,8 +78,8 @@ private: class HostCounter final : public VideoCommon::HostCounterBase { public: - explicit HostCounter(VKQueryCache& cache, std::shared_ptr dependency, - VideoCore::QueryType type); + explicit HostCounter(VKQueryCache& cache_, std::shared_ptr dependency_, + VideoCore::QueryType type_); ~HostCounter(); void EndQuery(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e0fb8693f..560386081 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -904,15 +904,14 @@ void RasterizerVulkan::SetupShaderDescriptors( texture_cache.GuardSamplers(false); } -void RasterizerVulkan::SetupImageTransitions( - Texceptions texceptions, const std::array& color_attachments, - const View& zeta_attachment) { +void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, + const ZetaAttachment& zeta) { TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); - for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { - const auto color_attachment = color_attachments[rt]; + for (std::size_t rt = 0; rt < color.size(); ++rt) { + const auto color_attachment = color[rt]; if (color_attachment == nullptr) { continue; } @@ -923,13 +922,13 @@ void RasterizerVulkan::SetupImageTransitions( VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); } - if (zeta_attachment != nullptr) { + if (zeta != nullptr) { const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 237e51fa4..1789fb285 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -160,6 +160,9 @@ private: bool is_indexed = 0; }; + using ColorAttachments = std::array; + using ZetaAttachment = View; + using Texceptions = std::bitset; static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; @@ -181,9 +184,8 @@ private: /// Setup descriptors in the graphics pipeline. void SetupShaderDescriptors(const std::array& shaders); - void SetupImageTransitions(Texceptions texceptions, - const std::array& color_attachments, - const View& zeta_attachment); + void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, + const ZetaAttachment& zeta); void UpdateDynamicStates(); @@ -308,8 +310,8 @@ private: vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; - std::array color_attachments; - View zeta_attachment; + ColorAttachments color_attachments; + ZetaAttachment zeta_attachment; std::vector sampled_views; std::vector image_views; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 80284cf92..e812c7dd6 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -24,7 +24,7 @@ bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} +VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} VKRenderPassCache::~VKRenderPassCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 8b0fec720..652ecef7b 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -55,7 +55,7 @@ namespace Vulkan { class VKRenderPassCache final { public: - explicit VKRenderPassCache(const VKDevice& device); + explicit VKRenderPassCache(const VKDevice& device_); ~VKRenderPassCache(); VkRenderPass GetRenderPass(const RenderPassParams& params); diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index b068888f9..b859691fa 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -36,7 +36,7 @@ VkBorderColor ConvertBorderColor(std::array color) { } // Anonymous namespace -VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} +VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} VKSamplerCache::~VKSamplerCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index a33d1c0ee..3f22c4610 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -14,7 +14,7 @@ class VKDevice; class VKSamplerCache final : public VideoCommon::SamplerCache { public: - explicit VKSamplerCache(const VKDevice& device); + explicit VKSamplerCache(const VKDevice& device_); ~VKSamplerCache(); protected: diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 7be8a19f0..6d3a5da0b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -104,7 +104,7 @@ private: template class TypedCommand final : public Command { public: - explicit TypedCommand(T&& command) : command{std::move(command)} {} + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} ~TypedCommand() override = default; TypedCommand(TypedCommand&&) = delete; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index fed9ebecd..7b0169acd 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -55,8 +55,8 @@ enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; class Expression final { public: - Expression(Id id, Type type) : id{id}, type{type} { - ASSERT(type != Type::Void); + Expression(Id id_, Type type_) : id{id_}, type{type_} { + ASSERT(type_ != Type::Void); } Expression() : type{Type::Void} {} @@ -281,12 +281,12 @@ u32 ShaderVersion(const VKDevice& device) { class SPIRVDecompiler final : public Sirit::Module { public: - explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, - const Registry& registry, const Specialization& specialization) - : Module(ShaderVersion(device)), device{device}, ir{ir}, stage{stage}, - header{ir.GetHeader()}, registry{registry}, specialization{specialization} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, + const Registry& registry_, const Specialization& specialization_) + : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, + header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { + if (stage_ != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); } AddCapability(spv::Capability::Shader); @@ -330,7 +330,7 @@ public: if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } - t_scalar_half = Name(TypeFloat(device.IsFloat16Supported() ? 16 : 32), "scalar_half"); + t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); t_half = Name(TypeVector(t_scalar_half, 2), "half"); const Id main = Decompile(); @@ -1088,9 +1088,9 @@ private: indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); } - const auto& output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = - std::any_of(output_attributes.begin(), output_attributes.end(), [](const auto& index) { + const auto& ir_output_attributes = ir.GetOutputAttributes(); + const bool declare_clip_distances = std::any_of( + ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { return index == Attribute::Index::ClipDistances0123 || index == Attribute::Index::ClipDistances4567; }); @@ -2891,7 +2891,7 @@ private: class ExprDecompiler { public: - explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} Id operator()(const ExprAnd& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); @@ -2947,7 +2947,7 @@ private: class ASTDecompiler { public: - explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 110848922..df1812514 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -30,8 +30,8 @@ constexpr u32 DESCRIPTOR_SET = 0; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: - explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index) - : VideoCommon::Shader::ConstBuffer{entry}, index{index} {} + explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) + : ConstBuffer{entry_}, index{index_} {} constexpr u32 GetIndex() const { return index; @@ -43,8 +43,8 @@ private: class GlobalBufferEntry { public: - constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {} + constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) + : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} constexpr u32 GetCbufIndex() const { return cbuf_index; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f2c8f2ae1..64649699f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -180,19 +180,19 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP return ci; } -u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { +u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, + SwizzleSource w_source) { return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | (static_cast(z_source) << 8) | static_cast(w_source); } } // Anonymous namespace -CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device}, - memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { +CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, + GPUVAddr gpu_addr_, const SurfaceParams& params_) + : SurfaceBase{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, + memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); commit = memory_manager.Commit(buffer, false); @@ -234,7 +234,7 @@ void CachedSurface::UploadTexture(const std::vector& staging_buffer) { void CachedSurface::DownloadTexture(std::vector& staging_buffer) { UNIMPLEMENTED_IF(params.IsBuffer()); - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { + if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); } @@ -244,10 +244,10 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); + const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); // TODO(Rodrigo): Do this in a single copy for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, + scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); }); @@ -255,16 +255,17 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { scheduler.Finish(); // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. - std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size); + std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), + host_memory_size); } void CachedSurface::DecorateSurfaceName() { // TODO(Rodrigo): Add name decorations } -View CachedSurface::CreateView(const ViewParams& params) { +View CachedSurface::CreateView(const ViewParams& view_params) { // TODO(Rodrigo): Add name decorations - return views[params] = std::make_shared(device, *this, params); + return views[view_params] = std::make_shared(device, *this, view_params); } void CachedSurface::UploadBuffer(const std::vector& staging_buffer) { @@ -348,21 +349,21 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { static_cast(params.GetNumLayers())}; } -CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params) - : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, - image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, - aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, - base_level{params.base_level}, num_levels{params.num_levels}, - image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { +CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, + const ViewParams& view_params_) + : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, + image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, + aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, + base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, + image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { base_layer = 0; num_layers = 1; - base_slice = params.base_layer; - num_slices = params.num_layers; + base_slice = view_params_.base_layer; + num_slices = view_params_.num_layers; } else { - base_layer = params.base_layer; - num_layers = params.num_layers; + base_layer = view_params_.base_layer; + num_layers = view_params_.num_layers; } } @@ -384,7 +385,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { + if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. std::swap(swizzle[0], swizzle[2]); } @@ -395,12 +396,12 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; - switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: - case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: + switch (surface_params.pixel_format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; - case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: + case PixelFormat::S8_UINT_D24_UNORM: aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: @@ -417,7 +418,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ASSERT(base_slice == 0); - ASSERT(num_slices == params.depth); + ASSERT(num_slices == surface_params.depth); } image_view = device.GetLogical().CreateImageView({ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 39202feba..06880f228 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -40,9 +40,9 @@ class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params); + explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, + GPUVAddr gpu_addr_, const SurfaceParams& params_); ~CachedSurface(); void UploadTexture(const std::vector& staging_buffer) override; @@ -84,7 +84,7 @@ public: protected: void DecorateSurfaceName(); - View CreateView(const ViewParams& params) override; + View CreateView(const ViewParams& view_params) override; private: void UploadBuffer(const std::vector& staging_buffer); @@ -110,8 +110,8 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params); + explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, + const ViewParams& view_params_); ~CachedSurfaceView(); VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, @@ -126,11 +126,11 @@ public: } u32 GetWidth() const { - return params.GetMipWidth(base_level); + return surface_params.GetMipWidth(base_level); } u32 GetHeight() const { - return params.GetMipHeight(base_level); + return surface_params.GetMipHeight(base_level); } u32 GetNumLayers() const { @@ -169,7 +169,7 @@ public: private: // Store a copy of these values to avoid double dereference when reading them - const SurfaceParams params; + const SurfaceParams surface_params; const VkImage image; const VkBufferView buffer_view; const VkImageAspectFlags aspect_mask; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 351c048d2..8826da325 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -14,8 +14,8 @@ namespace Vulkan { -VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler) - : device{device}, scheduler{scheduler} {} +VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) + : device{device_}, scheduler{scheduler_} {} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 945320c72..f7e3c9821 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class VKUpdateDescriptorQueue final { public: - explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler); + explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); ~VKUpdateDescriptorQueue(); void TickFrame(); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 4e83303d8..1eced809e 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -417,7 +417,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span buffe } Instance Instance::Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dld) noexcept { + InstanceDispatch& dispatch) noexcept { const VkApplicationInfo application_info{ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = nullptr, @@ -439,17 +439,17 @@ Instance Instance::Create(u32 version, Span layers, Span> Instance::EnumeratePhysicalDevices() { @@ -540,7 +540,7 @@ std::vector SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, - DeviceDispatch& dld) noexcept { + DeviceDispatch& dispatch) noexcept { const VkDeviceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = next, @@ -555,11 +555,11 @@ Device Device::Create(VkPhysicalDevice physical_device, Span allocations, std::size_t num, - VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept - : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {} + explicit PoolAllocations(std::unique_ptr allocations_, std::size_t num_, + VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept + : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_}, + dld{&dld_} {} /// Copying Vulkan allocations is not supported and will never be. PoolAllocations(const PoolAllocations&) = delete; @@ -565,7 +566,7 @@ class Instance : public Handle { public: /// Creates a Vulkan instance. Use "operator bool" for error handling. static Instance Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dld) noexcept; + InstanceDispatch& dispatch) noexcept; /// Enumerates physical devices. /// @return Physical devices and an empty handle on failure. @@ -581,7 +582,8 @@ public: constexpr Queue() noexcept = default; /// Construct a queue handle. - constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} + constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept + : queue{queue_}, dld{&dld_} {} VkResult Submit(Span submit_infos, VkFence fence = VK_NULL_HANDLE) const noexcept { @@ -720,7 +722,7 @@ class Device : public Handle { public: static Device Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, - DeviceDispatch& dld) noexcept; + DeviceDispatch& dispatch) noexcept; Queue GetQueue(u32 family_index) const noexcept; @@ -809,8 +811,9 @@ class PhysicalDevice { public: constexpr PhysicalDevice() noexcept = default; - constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept - : physical_device{physical_device}, dld{&dld} {} + constexpr PhysicalDevice(VkPhysicalDevice physical_device_, + const InstanceDispatch& dld_) noexcept + : physical_device{physical_device_}, dld{&dld_} {} constexpr operator VkPhysicalDevice() const noexcept { return physical_device; @@ -849,8 +852,8 @@ class CommandBuffer { public: CommandBuffer() noexcept = default; - explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept - : handle{handle}, dld{&dld} {} + explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept + : handle{handle_}, dld{&dld_} {} const VkCommandBuffer* address() const noexcept { return &handle; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 4c8971615..d656e0668 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -241,10 +241,10 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) ParseInfo parse_info{}; SingleBranch single_branch{}; - const auto insert_label = [](CFGRebuildState& state, u32 address) { - const auto pair = state.labels.emplace(address); + const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { + const auto pair = rebuild_state.labels.emplace(label_address); if (pair.second) { - state.inspect_queries.push_back(address); + rebuild_state.inspect_queries.push_back(label_address); } }; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 1ed4212ee..532f66d27 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -358,9 +358,9 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { instr.suldst.GetStoreDataLayout() != StoreType::Bits64); auto descriptor = [this, instr] { - std::optional descriptor; + std::optional sampler_descriptor; if (instr.suldst.is_immediate) { - descriptor = + sampler_descriptor = registry.ObtainBoundSampler(static_cast(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); @@ -368,12 +368,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { static_cast(global_code.size())); const auto buffer = std::get<1>(result); const auto offset = std::get<2>(result); - descriptor = registry.ObtainBindlessSampler(buffer, offset); + sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); } - if (!descriptor) { + if (!sampler_descriptor) { UNREACHABLE_MSG("Failed to obtain image descriptor"); } - return *descriptor; + return *sampler_descriptor; }(); const auto comp_mask = GetImageComponentMask(descriptor.format); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 29a7cfbfe..1db500bc4 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -90,11 +90,11 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); return Immediate(0U); case SystemVariable::Tid: { - Node value = Immediate(0); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return value; + Node val = Immediate(0); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); + return val; } case SystemVariable::TidX: return Operation(OperationCode::LocalInvocationIdX); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index b44c09d71..42a1c0c6f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -167,27 +167,28 @@ std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams return result; } -void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, - u8* buffer, u32 level) { - const u32 width{params.GetMipWidth(level)}; - const u32 height{params.GetMipHeight(level)}; - const u32 block_height{params.GetMipBlockHeight(level)}; - const u32 block_depth{params.GetMipBlockDepth(level)}; +void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, + const SurfaceParams& surface_params, u8* buffer, u32 level) { + const u32 width{surface_params.GetMipWidth(level)}; + const u32 height{surface_params.GetMipHeight(level)}; + const u32 block_height{surface_params.GetMipBlockHeight(level)}; + const u32 block_depth{surface_params.GetMipBlockDepth(level)}; std::size_t guest_offset{mipmap_offsets[level]}; - if (params.is_layered) { + if (surface_params.is_layered) { std::size_t host_offset = 0; const std::size_t guest_stride = layer_size; - const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.depth; ++layer) { - MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, - params.tile_width_spacing, buffer + host_offset, memory + guest_offset); + const std::size_t host_stride = surface_params.GetHostLayerSize(level); + for (u32 layer = 0; layer < surface_params.depth; ++layer) { + MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, + block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, + memory + guest_offset); guest_offset += guest_stride; host_offset += host_stride; } } else { - MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, - params.GetMipDepth(level), params.tile_width_spacing, buffer, + MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, + surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, memory + guest_offset); } } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 173f2edba..cfcfa5b3a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -167,8 +167,8 @@ protected: std::vector mipmap_offsets; private: - void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level); + void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, + u8* buffer, u32 level); std::vector BreakDownLayered(const SurfaceParams& in_params) const; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 13dd16356..305297719 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -356,18 +356,18 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { - const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; - const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{is_layered ? 1U : GetMipDepth(level)}; + const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; + const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; + const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; if (is_tiled) { - return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, - depth, GetMipBlockHeight(level), + return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, + mip_height, mip_depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } else if (as_host_size || IsBuffer()) { - return GetBytesPerPixel() * width * height * depth; + return GetBytesPerPixel() * mip_width * mip_height * mip_depth; } else { // Linear Texture Case - return pitch * height * depth; + return pitch * mip_height * mip_depth; } } -- cgit v1.2.3 From f95602f15207851b849c57e2a2dd313a087b2493 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 5 Dec 2020 11:40:14 -0500 Subject: video_core: Resolve more variable shadowing scenarios pt.3 Cleans out the rest of the occurrences of variable shadowing and makes any further occurrences of shadowing compiler errors. --- src/video_core/CMakeLists.txt | 9 +- src/video_core/query_cache.h | 8 +- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 32 +++--- src/video_core/renderer_opengl/gl_buffer_cache.h | 14 +-- src/video_core/renderer_opengl/gl_query_cache.cpp | 23 +++-- src/video_core/renderer_opengl/gl_query_cache.h | 10 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 +-- src/video_core/renderer_opengl/gl_rasterizer.h | 8 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- src/video_core/renderer_opengl/gl_shader_cache.h | 9 +- .../renderer_opengl/gl_shader_decompiler.h | 4 +- .../renderer_opengl/gl_texture_cache.cpp | 18 ++-- src/video_core/renderer_opengl/gl_texture_cache.h | 6 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 4 +- src/video_core/renderer_opengl/renderer_opengl.h | 8 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 110 ++++++++++----------- src/video_core/renderer_vulkan/vk_buffer_cache.h | 16 +-- src/video_core/renderer_vulkan/vk_command_pool.cpp | 4 +- src/video_core/renderer_vulkan/vk_command_pool.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 19 ++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 20 ++-- src/video_core/renderer_vulkan/vk_query_cache.cpp | 22 ++--- src/video_core/renderer_vulkan/vk_query_cache.h | 4 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 14 +-- src/video_core/renderer_vulkan/vk_rasterizer.h | 10 +- .../renderer_vulkan/vk_texture_cache.cpp | 8 +- src/video_core/renderer_vulkan/vk_texture_cache.h | 9 +- src/video_core/shader/ast.cpp | 4 +- src/video_core/shader/ast.h | 31 +++--- src/video_core/shader/async_shaders.cpp | 2 +- src/video_core/shader/async_shaders.h | 2 +- src/video_core/shader/control_flow.cpp | 4 +- src/video_core/shader/control_flow.h | 14 +-- src/video_core/shader/decode.cpp | 2 +- .../shader/decode/arithmetic_integer.cpp | 7 +- src/video_core/shader/expr.h | 6 +- src/video_core/shader/node.h | 16 +-- src/video_core/shader/shader_ir.cpp | 7 +- src/video_core/shader/shader_ir.h | 8 +- src/video_core/texture_cache/copy_params.h | 18 ++-- .../texture_cache/format_lookup_table.cpp | 12 +-- src/video_core/texture_cache/surface_base.cpp | 8 +- src/video_core/texture_cache/surface_base.h | 10 +- src/video_core/texture_cache/surface_view.h | 10 +- src/video_core/textures/texture.h | 2 +- 45 files changed, 293 insertions(+), 280 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index abcee2a1c..a021d61f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -297,13 +297,20 @@ if (ENABLE_NSIGHT_AFTERMATH) endif() if (MSVC) - target_compile_options(video_core PRIVATE /we4267) + target_compile_options(video_core PRIVATE + /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4456 # Declaration of 'identifier' hides previous local declaration + /we4457 # Declaration of 'identifier' hides function parameter + /we4458 # Declaration of 'identifier' hides class member + /we4459 # Declaration of 'identifier' hides global declaration + ) else() target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion -Werror=pessimizing-move -Werror=redundant-move + -Werror=shadow -Werror=switch -Werror=type-limits -Werror=unused-variable diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index fc54ca0ef..203f2af05 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -28,8 +28,8 @@ namespace VideoCommon { template class CounterStreamBase { public: - explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) - : cache{cache}, type{type} {} + explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) + : cache{cache_}, type{type_} {} /// Updates the state of the stream, enabling or disabling as needed. void Update(bool enabled) { @@ -334,8 +334,8 @@ private: template class CachedQueryBase { public: - explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) - : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_) + : cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {} virtual ~CachedQueryBase() = default; CachedQueryBase(CachedQueryBase&&) noexcept = default; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b1c4cd62f..60735d502 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,11 +22,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) + : BufferBlock{cpu_addr_, size_} { gl_buffer.Create(); - glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); - if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { + glNamedBufferData(gl_buffer.handle, static_cast(size_), nullptr, GL_DYNAMIC_DRAW); + if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } @@ -34,14 +34,14 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), - data); +void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { + glNamedBufferSubData(Handle(), static_cast(offset), + static_cast(data_size), data); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { +void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - const GLsizeiptr gl_size = static_cast(size); + const GLsizeiptr gl_size = static_cast(data_size); const GLintptr gl_offset = static_cast(offset); if (read_buffer.handle == 0) { read_buffer.Create(); @@ -54,16 +54,16 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) { + std::size_t copy_size) { glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), - static_cast(dst_offset), static_cast(size)); + static_cast(dst_offset), static_cast(copy_size)); } -OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device_, std::size_t stream_size) - : GenericBufferCache{rasterizer, gpu_memory, cpu_memory, - std::make_unique(device_, stream_size, true)}, +OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + const Device& device_, std::size_t stream_size_) + : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, + std::make_unique(device_, stream_size_, true)}, device{device_} { if (!device.HasFastBufferSubData()) { return; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f75b32e31..95251e26b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,15 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); + explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data); + void Upload(std::size_t offset, std::size_t data_size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data); + void Download(std::size_t offset, std::size_t data_size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size); + std::size_t copy_size); GLuint Handle() const noexcept { return gl_buffer.handle; @@ -52,9 +52,9 @@ private: using GenericBufferCache = VideoCommon::BufferCache; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, std::size_t stream_size); + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + const Device& device_, std::size_t stream_size_); ~OGLBufferCache(); BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index bcc37471f..acebbf5f4 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -30,11 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { } // Anonymous namespace -QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory) - : VideoCommon::QueryCacheBase( - rasterizer, maxwell3d, gpu_memory), - gl_rasterizer{rasterizer} {} +QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_) + : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {} QueryCache::~QueryCache() = default; @@ -59,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept { return gl_rasterizer.AnyCommandQueued(); } -HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr dependency, +HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr dependency_, VideoCore::QueryType type_) - : HostCounterBase{std::move(dependency)}, cache{cache_}, type{type_}, - query{cache.AllocateQuery(type)} { + : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{ + cache.AllocateQuery( + type)} { glBeginQuery(GetTarget(type), query.handle); } @@ -86,14 +85,14 @@ u64 HostCounter::BlockingQuery() const { return static_cast(value); } -CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr, - u8* host_ptr) - : CachedQueryBase{cpu_addr, host_ptr}, cache{&cache_}, type{type_} {} +CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_, + u8* host_ptr_) + : CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {} CachedQuery::~CachedQuery() = default; CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept - : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { cache = rhs.cache; diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index d9851e880..7bbe5cfe9 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -29,8 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase; class QueryCache final : public VideoCommon::QueryCacheBase { public: - explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory); + explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_); ~QueryCache(); OGLQuery AllocateQuery(VideoCore::QueryType type); @@ -46,7 +46,7 @@ private: class HostCounter final : public VideoCommon::HostCounterBase { public: - explicit HostCounter(QueryCache& cache_, std::shared_ptr dependency, + explicit HostCounter(QueryCache& cache_, std::shared_ptr dependency_, VideoCore::QueryType type_); ~HostCounter(); @@ -62,8 +62,8 @@ private: class CachedQuery final : public VideoCommon::CachedQueryBase { public: - explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr, - u8* host_ptr); + explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_, + u8* host_ptr_); ~CachedQuery() override; CachedQuery(CachedQuery&& rhs) noexcept; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cfddbde5d..8572af5a5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -149,19 +149,19 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, - Core::Memory::Memory& cpu_memory, const Device& device_, +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory_, const Device& device_, ScreenInfo& screen_info_, ProgramManager& program_manager_, StateTracker& state_tracker_) - : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), - shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE), + buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window) { + async_shaders(emu_window_) { CheckExtensions(); unified_uniform_buffer.Create(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1d0f585fa..de28cff15 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -62,10 +62,10 @@ static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, - Core::Memory::Memory& cpu_memory, const Device& device, - ScreenInfo& screen_info, ProgramManager& program_manager, - StateTracker& state_tracker); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory_, const Device& device_, + ScreenInfo& screen_info_, ProgramManager& program_manager_, + StateTracker& state_tracker_); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9f2c0a222..0b96481f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -318,14 +318,13 @@ std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); } -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_) - : VideoCommon::ShaderCache{rasterizer}, emu_window{emu_window_}, gpu{gpu_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, device{device_} {} + : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ab5374fac..2aed0697e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -119,10 +119,11 @@ private: class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window, - Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device); + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, + Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; /// Loads disk cache for the current game diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index f5a5249f2..c4ff47875 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -25,8 +25,8 @@ using ImageEntry = VideoCommon::Shader::Image; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: - explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index_) - : ConstBuffer{max_offset, is_indirect}, index{index_} {} + explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) + : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} u32 GetIndex() const { return index; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a59fe853e..f19ef2173 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -347,14 +347,14 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& stagin internal_format, image_size, buffer); break; case SurfaceTarget::TextureCubemap: { - const std::size_t layer_size{params.GetHostLayerSize(level)}; + const std::size_t host_layer_size{params.GetHostLayerSize(level)}; for (std::size_t face = 0; face < params.depth; ++face) { glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), static_cast(params.GetMipWidth(level)), static_cast(params.GetMipHeight(level)), 1, - internal_format, static_cast(layer_size), - buffer); - buffer += layer_size; + internal_format, + static_cast(host_layer_size), buffer); + buffer += host_layer_size; } break; } @@ -532,12 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const Device& device, +TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, StateTracker& state_tracker_) - : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{ - state_tracker_} { + : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, + state_tracker{state_tracker_} { src_framebuffer.Create(); dst_framebuffer.Create(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 76a7b2316..72b284fab 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -130,9 +130,9 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const Device& device, + explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, StateTracker& state_tracker); ~TextureCacheOpenGL(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1523cd6fa..521b03ba2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,8 +130,8 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, - std::unique_ptr context) - : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_}, + std::unique_ptr context_) + : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} RendererOpenGL::~RendererOpenGL() = default; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 9ef181f95..376f88766 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -57,10 +57,10 @@ struct ScreenInfo { class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::TelemetrySession& telemetry_session, - Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, - Tegra::GPU& gpu, - std::unique_ptr context); + explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window_, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr context_); ~RendererOpenGL() override; bool Init() override; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d9d3da9ea..444d3fb93 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -38,13 +38,13 @@ std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) + : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { const VkBufferCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = static_cast(size), + .size = static_cast(size_), .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, @@ -57,69 +57,71 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(size, true); - std::memcpy(staging.commit->Map(size), data, size); +void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); + std::memcpy(staging.commit->Map(data_size), data, data_size); scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - - const VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = UPLOAD_ACCESS_BARRIERS, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = size, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, - barrier, {}); - }); + scheduler.Record( + [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); + + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, + barrier, {}); + }); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(size, true); +void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - const VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = size, - }; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); - }); + scheduler.Record( + [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); + }); scheduler.Finish(); - std::memcpy(data, staging.commit->Map(size), size); + std::memcpy(data, staging.commit->Map(data_size), data_size); } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) { + std::size_t copy_size) { scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer dst_buffer = Handle(); scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); + copy_size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); std::array barriers; barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -130,7 +132,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[0].buffer = src_buffer; barriers[0].offset = src_offset; - barriers[0].size = size; + barriers[0].size = copy_size; barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barriers[1].pNext = nullptr; barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -139,19 +141,17 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[1].buffer = dst_buffer; barriers[1].offset = dst_offset; - barriers[1].size = size; + barriers[1].size = copy_size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barriers, {}); }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, const VKDevice& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) - : VideoCommon::BufferCache{rasterizer, gpu_memory, cpu_memory, - CreateStreamBuffer(device_, - scheduler_)}, + : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 7fb5ceedf..6008b8373 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -22,15 +22,15 @@ class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); + VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data); + void Upload(std::size_t offset, std::size_t data_size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data); + void Download(std::size_t offset, std::size_t data_size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size); + std::size_t copy_size); VkBuffer Handle() const { return *buffer.handle; @@ -49,10 +49,10 @@ private: class VKBufferCache final : public VideoCommon::BufferCache { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool); + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); ~VKBufferCache(); BufferInfo GetEmptyBuffer(std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 256a39148..8f7d6410e 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -17,8 +17,8 @@ struct CommandPool::Pool { vk::CommandBuffers cmdbufs; }; -CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device_) - : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device_} {} +CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) + : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} CommandPool::~CommandPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 33655eca4..62a7ce3f1 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -17,7 +17,7 @@ class VKDevice; class CommandPool final : public ResourcePool { public: - explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device_); + explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); ~CommandPool() override; void Allocate(size_t begin, size_t end) override; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index df7e8c864..39e58a56f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -136,26 +136,25 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, - u32 main_offset) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), - shader_ir(program_code, main_offset, compiler_settings, registry), +Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, + GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) + : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), + shader_ir(program_code, main_offset_, compiler_settings, registry), entries(GenerateShaderEntries(shader_ir)) {} Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, +VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, VKRenderPassCache& renderpass_cache_) - : VideoCommon::ShaderCache{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, - update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} + : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, + descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, + renderpass_cache{renderpass_cache_} {} VKPipelineCache::~VKPipelineCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e558e6658..9e1f8fcbb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -84,9 +84,9 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, - Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset); + explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, + Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, + VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); ~Shader(); GPUVAddr GetGpuAddr() const { @@ -119,13 +119,13 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const VKDevice& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache); + explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_); ~VKPipelineCache() override; std::array GetShaders(); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 6fa071737..038760de3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -69,12 +69,10 @@ void QueryPool::Reserve(std::pair query) { VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKScheduler& scheduler_) - : QueryCacheBase{rasterizer_, maxwell3d_, - gpu_memory_}, - device{device_}, scheduler{scheduler_}, query_pools{ - QueryPool{device_, scheduler_, - QueryType::SamplesPassed}, - } {} + : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, + query_pools{ + QueryPool{device_, scheduler_, QueryType::SamplesPassed}, + } {} VKQueryCache::~VKQueryCache() { // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class @@ -97,8 +95,8 @@ void VKQueryCache::Reserve(QueryType type, std::pair query) { HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr dependency_, QueryType type_) - : HostCounterBase{std::move(dependency_)}, cache{cache_}, - type{type_}, query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { + : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, + query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { const vk::Device* logical = &cache_.Device().GetLogical(); cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); @@ -119,18 +117,20 @@ u64 HostCounter::BlockingQuery() const { if (tick >= cache.Scheduler().CurrentTick()) { cache.Scheduler().Flush(); } + u64 data; - const VkResult result = cache.Device().GetLogical().GetQueryResults( + const VkResult query_result = cache.Device().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - switch (result) { + + switch (query_result) { case VK_SUCCESS: return data; case VK_ERROR_DEVICE_LOST: cache.Device().ReportLoss(); [[fallthrough]]; default: - throw vk::Exception(result); + throw vk::Exception(query_result); } } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 201fca888..837fe9ebf 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -95,8 +95,8 @@ private: class CachedQuery : public VideoCommon::CachedQueryBase { public: - explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) - : VideoCommon::CachedQueryBase{cpu_addr, host_ptr} {} + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) + : CachedQueryBase{cpu_addr_, host_ptr_} {} }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 560386081..f93986aab 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -128,12 +128,12 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry const u32 offset_2 = entry.secondary_offset; const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); } } if (entry.is_bindless) { const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(tex_handle); + return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); } const auto& gpu_profile = engine.AccessGuestDriverProfile(); const u32 entry_offset = static_cast(index * gpu_profile.GetTextureHandlerSize()); @@ -380,12 +380,12 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { } } -RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, +RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, - Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, + Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, const VKDevice& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) - : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), + : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), @@ -397,11 +397,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra: texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache), - buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), + buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) { + wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1789fb285..30ec58eb4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -105,11 +105,11 @@ struct ImageView { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - VKScreenInfo& screen_info, const VKDevice& device, - VKMemoryManager& memory_manager, StateTracker& state_tracker, - VKScheduler& scheduler); + explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + VKScreenInfo& screen_info_, const VKDevice& device_, + VKMemoryManager& memory_manager_, StateTracker& state_tracker_, + VKScheduler& scheduler_); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 64649699f..1ff109880 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -489,12 +489,12 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } -VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const VKDevice& device_, +VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) - : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), + : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 06880f228..1c632bd2c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -193,10 +193,11 @@ private: class VKTextureCache final : public TextureCacheBase { public: - explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool); + explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + VKMemoryManager& memory_manager_, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_); ~VKTextureCache(); private: diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 3f96d9076..cc2dbe36c 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -374,8 +374,8 @@ std::string ASTManager::Print() const { return printer.GetResult(); } -ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation) - : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {}; +ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) + : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} ASTManager::~ASTManager() { Clear(); diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index 8e5a22ab3..dc49b369e 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -76,7 +76,7 @@ public: class ASTIfThen { public: - explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {} + explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} Expr condition; ASTZipper nodes{}; }; @@ -88,63 +88,68 @@ public: class ASTBlockEncoded { public: - explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {} + explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} u32 start; u32 end; }; class ASTBlockDecoded { public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {} + explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} NodeBlock nodes; }; class ASTVarSet { public: - explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {} + explicit ASTVarSet(u32 index_, Expr condition_) + : index{index_}, condition{std::move(condition_)} {} + u32 index; Expr condition; }; class ASTLabel { public: - explicit ASTLabel(u32 index) : index{index} {} + explicit ASTLabel(u32 index_) : index{index_} {} u32 index; bool unused{}; }; class ASTGoto { public: - explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {} + explicit ASTGoto(Expr condition_, u32 label_) + : condition{std::move(condition_)}, label{label_} {} + Expr condition; u32 label; }; class ASTDoWhile { public: - explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {} + explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} Expr condition; ASTZipper nodes{}; }; class ASTReturn { public: - explicit ASTReturn(Expr condition, bool kills) - : condition{std::move(condition)}, kills{kills} {} + explicit ASTReturn(Expr condition_, bool kills_) + : condition{std::move(condition_)}, kills{kills_} {} + Expr condition; bool kills; }; class ASTBreak { public: - explicit ASTBreak(Expr condition) : condition{std::move(condition)} {} + explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} Expr condition; }; class ASTBase { public: - explicit ASTBase(ASTNode parent, ASTData data) - : data{std::move(data)}, parent{std::move(parent)} {} + explicit ASTBase(ASTNode parent_, ASTData data_) + : data{std::move(data_)}, parent{std::move(parent_)} {} template static ASTNode Make(ASTNode parent, Args&&... args) { @@ -300,7 +305,7 @@ private: class ASTManager final { public: - ASTManager(bool full_decompile, bool disable_else_derivation); + explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); ~ASTManager(); ASTManager(const ASTManager& o) = delete; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 6920afdf2..78245473c 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -13,7 +13,7 @@ namespace VideoCommon::Shader { -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} AsyncShaders::~AsyncShaders() { KillWorkers(); diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 7a99e1dc5..5a7216019 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -66,7 +66,7 @@ public: Tegra::Engines::ShaderType shader_type; }; - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); ~AsyncShaders(); /// Start up shader worker threads diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d656e0668..9120bf705 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -66,8 +66,8 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) - : program_code{program_code}, registry{registry}, start{start} {} + explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) + : program_code{program_code_}, registry{registry_}, start{start_} {} const ProgramCode& program_code; Registry& registry; diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 62a3510d8..37bf96492 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -42,10 +42,10 @@ struct Condition { class SingleBranch { public: SingleBranch() = default; - SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, - bool ignore) - : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, - ignore{ignore} {} + explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, + bool is_brk_, bool ignore_) + : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, + ignore{ignore_} {} bool operator==(const SingleBranch& b) const { return std::tie(condition, address, kill, is_sync, is_brk, ignore) == @@ -65,15 +65,15 @@ public: }; struct CaseBranch { - CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} u32 cmp_value; u32 address; }; class MultiBranch { public: - MultiBranch(u32 gpr, std::vector&& branches) - : gpr{gpr}, branches{std::move(branches)} {} + explicit MultiBranch(u32 gpr_, std::vector&& branches_) + : gpr{gpr_}, branches{std::move(branches_)} {} u32 gpr{}; std::vector branches{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index eeac328a6..c8f4da6df 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -66,7 +66,7 @@ std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, class ASTDecoder { public: - ASTDecoder(ShaderIR& ir) : ir(ir) {} + explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} void operator()(ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 73155966f..f32c3134b 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -258,7 +258,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - auto [op_a, op_b, op_c] = [&]() -> std::tuple { + auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple { switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), @@ -294,8 +294,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), "Unhandled LEA Predicate"); - Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); - value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); + Node value = + Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); + value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); SetRegister(bb, instr.gpr0, std::move(value)); break; diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index 4e8264367..cda284c72 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -76,7 +76,7 @@ public: class ExprPredicate final { public: - explicit ExprPredicate(u32 predicate) : predicate{predicate} {} + explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} bool operator==(const ExprPredicate& b) const { return predicate == b.predicate; @@ -91,7 +91,7 @@ public: class ExprCondCode final { public: - explicit ExprCondCode(ConditionCode cc) : cc{cc} {} + explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} bool operator==(const ExprCondCode& b) const { return cc == b.cc; @@ -121,7 +121,7 @@ public: class ExprGprEqual final { public: - ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} bool operator==(const ExprGprEqual& b) const { return gpr == b.gpr && value == b.value; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a1e2c4d8e..8db9e1de7 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -290,18 +290,18 @@ struct Sampler { is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor - constexpr explicit Sampler(u32 index_, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} + constexpr explicit Sampler(u32 index_, std::pair offsets_, + std::pair buffers_, Tegra::Shader::TextureType type_, + bool is_array_, bool is_shadow_, bool is_buffer_) + : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, + buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, + is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, - Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, + Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type}, is_array{is_array_}, + : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 29d794b34..879088a27 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -25,9 +25,10 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - Registry& registry) - : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { +ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, + Registry& registry_) + : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ + registry_} { Decode(); PostDecode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a98b2104..6aae14e34 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -29,8 +29,8 @@ struct ShaderBlock; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset, bool is_indirect) - : max_offset{max_offset}, is_indirect{is_indirect} {} + constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) + : max_offset{max_offset_}, is_indirect{is_indirect_} {} constexpr ConstBuffer() = default; @@ -66,8 +66,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - Registry& registry); + explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, + CompilerSettings settings_, Registry& registry_); ~ShaderIR(); const std::map& GetBasicBlocks() const { diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 9c21a0649..5b475fe06 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,16 +9,16 @@ namespace VideoCommon { struct CopyParams { - constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, - u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, - u32 depth) - : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, - dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, - dest_level{dest_level}, width{width}, height{height}, depth{depth} {} + constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, + u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, + u32 depth_) + : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, + dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, + dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} - constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) - : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, - dest_level{level}, width{width}, height{height}, depth{depth} {} + constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) + : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, + dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} u32 source_x; u32 source_y; diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7d5a75648..7938d71eb 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -24,12 +24,12 @@ constexpr bool C = false; // Normal color constexpr bool S = true; // Srgb struct Table { - constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, - ComponentType green_component, ComponentType blue_component, - ComponentType alpha_component, PixelFormat pixel_format) - : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, - green_component{green_component}, blue_component{blue_component}, - alpha_component{alpha_component}, is_srgb{is_srgb} {} + constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, + ComponentType green_component_, ComponentType blue_component_, + ComponentType alpha_component_, PixelFormat pixel_format_) + : texture_format{texture_format_}, pixel_format{pixel_format_}, + red_component{red_component_}, green_component{green_component_}, + blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} TextureFormat texture_format; PixelFormat pixel_format; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 42a1c0c6f..efbcf6723 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -25,11 +25,11 @@ StagingCache::StagingCache() = default; StagingCache::~StagingCache() = default; -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, + bool is_astc_supported_) + : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), mipmap_offsets(params.num_levels) { - is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; + is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; host_memory_size = params.GetHostSizeInBytes(is_converted); std::size_t offset = 0; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index cfcfa5b3a..b57135fe4 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -148,8 +148,8 @@ public: } protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, + bool is_astc_supported_); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; @@ -297,9 +297,9 @@ public: } protected: - explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} + explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, + bool is_astc_supported_) + : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} ~SurfaceBase() = default; diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 90a8bb0ae..199f72732 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,10 +13,10 @@ namespace VideoCommon { struct ViewParams { - constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, - u32 num_layers, u32 base_level, u32 num_levels) - : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, - num_levels{num_levels} {} + constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, + u32 num_layers_, u32 base_level_, u32 num_levels_) + : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, + base_level{base_level_}, num_levels{num_levels_} {} std::size_t Hash() const; @@ -44,7 +44,7 @@ struct ViewParams { class ViewBase { public: - constexpr explicit ViewBase(const ViewParams& params) : params{params} {} + constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} constexpr const ViewParams& GetViewParams() const { return params; diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 0574fef12..bbc7e3eaf 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { }; union TextureHandle { - TextureHandle(u32 raw) : raw{raw} {} + /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} u32 raw; BitField<0, 20, u32> tic_id; -- cgit v1.2.3 From a6e6cd5788dac6dc26101f4162d7704ca5c274e8 Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 31 Aug 2020 10:22:03 -0400 Subject: maxwell_dma: Rename RenderEnable::Mode::FALSE and TRUE to avoid name conflict On Apple platforms, FALSE and TRUE are defined as macros by , which is included by various system headers. Note that there appear to be no actual users of the names to fix up. --- src/video_core/engines/maxwell_dma.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 17bd280c4..3c59eeb13 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -72,11 +72,13 @@ public: struct RenderEnable { enum class Mode : u32 { - FALSE = 0, - TRUE = 1, - CONDITIONAL = 2, - RENDER_IF_EQUAL = 3, - RENDER_IF_NOT_EQUAL = 4, + // Note: This uses Pascal case in order to avoid the identifiers + // FALSE and TRUE, which are reserved on Darwin. + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, }; PackedGPUVAddr address; -- cgit v1.2.3 From d637114c170034aab8d2e8b4fa60ae87307dd9c5 Mon Sep 17 00:00:00 2001 From: comex Date: Sat, 14 Nov 2020 19:01:33 -0500 Subject: video_core: Adjust `NUM` macro to avoid Clang warning The previous definition was: #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) In cases where `field_name` happens to refer to an array, Clang thinks `sizeof(an array value) / sizeof(a type)` is an instance of the idiom where `sizeof` is used to compute an array length. So it thinks the type in the denominator ought to be the array element type, and warns if it isn't, assuming this is a mistake. In reality, `NUM` is not used to get array lengths at all, so there is no mistake. Silence the warning by applying Clang's suggested workaround of parenthesizing the denominator. --- src/video_core/dirty_flags.cpp | 2 +- src/video_core/renderer_opengl/gl_state_tracker.cpp | 2 +- src/video_core/renderer_vulkan/vk_state_tracker.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index e16075993..2faa6ef0e 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -9,7 +9,7 @@ #include "video_core/dirty_flags.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace VideoCommon::Dirty { diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 6bcf831f2..45f4fc565 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -13,7 +13,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace OpenGL { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 5d2c4a796..50164cc08 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -14,7 +14,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace Vulkan { -- cgit v1.2.3 From b8fbf6969c33a72c7facfd23e7a850e718afac43 Mon Sep 17 00:00:00 2001 From: comex Date: Sat, 14 Nov 2020 18:33:40 -0500 Subject: map_interval: Change field order to address uninitialized field warning Clang complains about `new_chunk`'s constructor using the then-uninitialized `first_chunk` (even though it's just to get a pointer into it). --- src/video_core/buffer_cache/map_interval.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index fe0bcd1d8..ef974b08a 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -84,9 +84,10 @@ private: void FillFreeList(Chunk& chunk); std::vector free_list; - std::unique_ptr* new_chunk = &first_chunk.next; Chunk first_chunk; + + std::unique_ptr* new_chunk = &first_chunk.next; }; } // namespace VideoCommon -- cgit v1.2.3 From eea5122d1b106c667b38741ad2bb16bcd54bbafd Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 15 Nov 2020 13:20:17 -0500 Subject: renderer_vulkan: Add missing `override` specifier --- src/video_core/renderer_vulkan/vk_texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 06880f228..bc5048a62 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -82,7 +82,7 @@ public: } protected: - void DecorateSurfaceName(); + void DecorateSurfaceName() override; View CreateView(const ViewParams& view_params) override; -- cgit v1.2.3 From bcc5c4403ab8a42adcf65f98f70a4f6d0ca94a02 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:46:53 -0500 Subject: maxwell_3d: Remove unused dirty_pointer array This is unused and removing it shrinks the structure by 3584 bytes. --- src/video_core/engines/maxwell_3d.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b0d9559d0..d9b53cb67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1504,8 +1504,6 @@ private: bool execute_on{true}; - std::array dirty_pointers{}; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; -- cgit v1.2.3 From ce0712bf95ec7e261f42108214ebf3f27cd32589 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:48:10 -0500 Subject: maxwell_3d: Resolve -Wdocumentation warning Removes a documentation comment for a non-existent member. --- src/video_core/engines/maxwell_3d.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d9b53cb67..71afa3ed3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1512,8 +1512,8 @@ private: /** * Call a macro on this engine. + * * @param method Method to call - * @param num_parameters Number of arguments * @param parameters Arguments to the method call */ void CallMacroMethod(u32 method, const std::vector& parameters); -- cgit v1.2.3 From 9e7a1f13516eb0bd5447e5758a2f7227a57f9bd9 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:51:03 -0500 Subject: maxwell_3d: Move member variables to end of class Follows our established coding style. --- src/video_core/engines/maxwell_3d.h | 63 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 71afa3ed3..564acbc53 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1473,37 +1473,6 @@ private: void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); - Core::System& system; - MemoryManager& memory_manager; - - VideoCore::RasterizerInterface* rasterizer = nullptr; - - /// Start offsets of each macro in macro_memory - std::array macro_positions = {}; - - std::array mme_inline{}; - - /// Macro method that is currently being executed / being fed parameters. - u32 executing_macro = 0; - /// Parameters that have been submitted to the macro call so far. - std::vector macro_params; - - /// Interpreter for the macro codes uploaded to the GPU. - std::unique_ptr macro_engine; - - static constexpr u32 null_cb_data = 0xFFFFFFFF; - struct { - std::array, 16> buffer; - u32 current{null_cb_data}; - u32 id{null_cb_data}; - u32 start_pos{}; - u32 counter{}; - } cb_data_state; - - Upload::State upload_state; - - bool execute_on{true}; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1562,6 +1531,38 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + + Core::System& system; + MemoryManager& memory_manager; + + VideoCore::RasterizerInterface* rasterizer = nullptr; + + /// Start offsets of each macro in macro_memory + std::array macro_positions{}; + + std::array mme_inline{}; + + /// Macro method that is currently being executed / being fed parameters. + u32 executing_macro = 0; + /// Parameters that have been submitted to the macro call so far. + std::vector macro_params; + + /// Interpreter for the macro codes uploaded to the GPU. + std::unique_ptr macro_engine; + + static constexpr u32 null_cb_data = 0xFFFFFFFF; + struct CBDataState { + std::array, 16> buffer; + u32 current{null_cb_data}; + u32 id{null_cb_data}; + u32 start_pos{}; + u32 counter{}; + }; + CBDataState cb_data_state; + + Upload::State upload_state; + + bool execute_on{true}; }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From 4c5f5c9bf301d3626df104dbed6fed6f115cedc8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 00:41:47 -0500 Subject: video_core: Remove unnecessary enum class casting in logging messages fmt now automatically prints the numeric value of an enum class member by default, so we don't need to use casts any more. Reduces the line noise a bit. --- src/video_core/command_classes/codecs/codec.cpp | 4 +-- src/video_core/command_classes/vic.cpp | 2 +- src/video_core/engines/fermi_2d.cpp | 3 +- src/video_core/engines/maxwell_3d.cpp | 11 +++---- src/video_core/engines/shader_bytecode.h | 6 ++-- src/video_core/gpu.cpp | 8 ++--- src/video_core/macro/macro_interpreter.cpp | 7 ++-- src/video_core/macro/macro_jit_x64.cpp | 5 ++- .../renderer_opengl/gl_arb_decompiler.cpp | 8 ++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 10 +++--- .../renderer_opengl/gl_texture_cache.cpp | 3 +- src/video_core/renderer_opengl/maxwell_to_gl.h | 30 +++++++++--------- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 37 ++++++++++------------ .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 8 ++--- .../renderer_vulkan/vk_texture_cache.cpp | 4 +-- src/video_core/shader/decode/arithmetic.cpp | 3 +- .../shader/decode/arithmetic_integer.cpp | 2 +- .../shader/decode/arithmetic_integer_immediate.cpp | 5 ++- src/video_core/shader/decode/conversion.cpp | 4 +-- src/video_core/shader/decode/memory.cpp | 25 +++++++-------- src/video_core/shader/decode/other.cpp | 34 ++++++++------------ src/video_core/shader/decode/shift.cpp | 2 +- src/video_core/shader/decode/texture.cpp | 9 +++--- src/video_core/shader/decode/warp.cpp | 2 +- src/video_core/shader/node_helper.cpp | 2 +- src/video_core/shader/shader_ir.cpp | 10 +++--- src/video_core/surface.cpp | 12 +++---- src/video_core/texture_cache/surface_params.cpp | 4 +-- src/video_core/texture_cache/texture_cache.h | 5 ++- src/video_core/textures/convert.cpp | 2 +- 33 files changed, 125 insertions(+), 148 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f547f5bd4..39bc923a5 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -44,7 +44,7 @@ Codec::~Codec() { } void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast(codec)); + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); current_codec = codec; } @@ -62,7 +62,7 @@ void Codec::Decode() { } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast(current_codec)); + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); return; } diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 6cfc193fa..66e21ce9c 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -27,7 +27,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { } void Vic::ProcessMethod(Method method, const std::vector& arguments) { - LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast(method)); + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method); VicStateWrite(static_cast(method), arguments[0]); const u64 arg = static_cast(arguments[0]) << 8; switch (method) { diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9409c4075..4293d676c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -48,8 +48,7 @@ static std::pair DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_ } void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", - static_cast(regs.operation)); + LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); // TODO(Subv): Only raw copies are implemented. ASSERT(regs.operation == Operation::SrcCopy); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6287df633..761962ed0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -359,7 +359,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -504,8 +504,7 @@ void Maxwell3D::ProcessCounterReset() { rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: - LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", - static_cast(regs.counter_reset)); + LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset); break; } } @@ -520,7 +519,7 @@ void Maxwell3D::ProcessSyncPoint() { } void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); @@ -558,12 +557,12 @@ std::optional Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed, system.GPU().GetTicks()); return std::nullopt; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); + regs.query.query_get.select.Value()); return 1; } } diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 37d17efdc..8b45f1b62 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1437,8 +1437,7 @@ union Instruction { return TextureType::TextureCube; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } @@ -1533,8 +1532,7 @@ union Instruction { return TextureType::Texture3D; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 964b3f3dc..e2512a7f2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -299,8 +299,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { break; } default: - LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", - static_cast(method)); + LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); break; } } @@ -379,7 +378,7 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); break; default: - UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast(engine_id)); + UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); } } @@ -392,8 +391,7 @@ void GPU::ProcessFenceActionMethod() { IncrementSyncPoint(regs.fence_action.syncpoint_id); break; default: - UNIMPLEMENTED_MSG("Unimplemented operation {}", - static_cast(regs.fence_action.op.Value())); + UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); } } diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 44a71aa6c..8da26fd59 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -133,8 +133,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { break; } default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", - static_cast(opcode.operation.Value())); + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); } // An instruction with the Exit flag will not actually @@ -182,7 +181,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, return ~(src_a & src_b); default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast(operation)); + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); return 0; } } @@ -230,7 +229,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r Send((result >> 12) & 0b111111); break; default: - UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast(operation)); + UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); } } diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c82bb987f..c6b2b2109 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -165,8 +165,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { } break; default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", - static_cast(opcode.alu_operation.Value())); + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); break; } Compile_ProcessResult(opcode.result_operation, opcode.dst); @@ -604,7 +603,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 Compile_Send(RESULT); break; default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast(operation)); + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); } } diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index 5378c398e..78066cc63 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) { case PixelImap::Unused: break; } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast(attribute)); + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); return {}; } @@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: return "TRIANGLES_ADJACENCY"; default: - UNIMPLEMENTED_MSG("topology={}", static_cast(topology)); + UNIMPLEMENTED_MSG("topology={}", topology); return "POINTS"; } } @@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { case Tegra::Shader::OutputTopology::TriangleStrip: return "TRIANGLE_STRIP"; default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast(topology)); + UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); return "points"; } } @@ -1351,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) { GetGenericAttributeIndex(index), swizzle); } } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast(index)); + UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); break; } return "{0, 0, 0, 0}.x"; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8572af5a5..e58e84759 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -131,7 +131,7 @@ std::pair TransformFeedbackEnum(u8 location) { case 43: return {GL_BACK_SECONDARY_COLOR_NV, 0}; } - UNIMPLEMENTED_MSG("index={}", static_cast(index)); + UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0940969ba..0c97a8988 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -316,7 +316,7 @@ std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology t case Maxwell::PrimitiveTopology::TriangleStripAdjacency: return {"triangles_adjacency", 6}; default: - UNIMPLEMENTED_MSG("topology={}", static_cast(topology)); + UNIMPLEMENTED_MSG("topology={}", topology); return {"points", 1}; } } @@ -342,7 +342,7 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { case Tegra::Shader::OutputTopology::TriangleStrip: return "triangle_strip"; default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast(topology)); + UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); return "points"; } } @@ -745,7 +745,7 @@ private: case PixelImap::Unused: break; } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast(attribute)); + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); return {}; } @@ -1252,7 +1252,7 @@ private: } break; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast(attribute)); + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); return {"0", Type::Int}; } @@ -1332,7 +1332,7 @@ private: GetSwizzle(element)), Type::Float}}; } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast(attribute)); + UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); return std::nullopt; } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f19ef2173..daf352b50 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -689,8 +689,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) dest_format.format, dest_format.type, nullptr); break; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(dst_params.target)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); UNREACHABLE(); } } diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index a8be2aa37..dd4ee3361 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -107,7 +107,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { case Maxwell::IndexFormat::UnsignedInt: return GL_UNSIGNED_INT; } - UNREACHABLE_MSG("Invalid index_format={}", static_cast(index_format)); + UNREACHABLE_MSG("Invalid index_format={}", index_format); return {}; } @@ -144,7 +144,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { case Maxwell::PrimitiveTopology::Patches: return GL_PATCHES; } - UNREACHABLE_MSG("Invalid topology={}", static_cast(topology)); + UNREACHABLE_MSG("Invalid topology={}", topology); return GL_POINTS; } @@ -172,8 +172,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, } break; } - UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", - static_cast(filter_mode), static_cast(mipmap_filter_mode)); + UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode, + mipmap_filter_mode); return GL_NEAREST; } @@ -204,7 +204,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return GL_MIRROR_CLAMP_TO_EDGE; } } - UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast(wrap_mode)); + UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode); return GL_REPEAT; } @@ -227,7 +227,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { case Tegra::Texture::DepthCompareFunc::Always: return GL_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast(func)); + UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func); return GL_GREATER; } @@ -249,7 +249,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return GL_MAX; } - UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation); return GL_FUNC_ADD; } @@ -313,7 +313,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } - UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor); return GL_ZERO; } @@ -333,7 +333,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast(source)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source); return GL_ZERO; } @@ -364,7 +364,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return GL_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return GL_ALWAYS; } @@ -395,7 +395,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { case Maxwell::StencilOp::DecrWrapOGL: return GL_DECR_WRAP; } - UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast(stencil)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil); return GL_KEEP; } @@ -406,7 +406,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } - UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face); return GL_CCW; } @@ -419,7 +419,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } - UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face); return GL_BACK; } @@ -458,7 +458,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { case Maxwell::LogicOperation::Set: return GL_SET; } - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast(operation)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation); return GL_COPY; } @@ -471,7 +471,7 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { case Maxwell::PolygonMode::Fill: return GL_FILL; } - UNREACHABLE_MSG("Invalid polygon mode={}", static_cast(polygon_mode)); + UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode); return GL_FILL; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 521b03ba2..cbfaaa99c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -348,7 +348,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } else { // Other transformations are unsupported LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", - static_cast(framebuffer_transform_flags)); + framebuffer_transform_flags); UNIMPLEMENTED(); } } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d22de1d81..58e117eb3 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -26,7 +26,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter) { case Tegra::Texture::TextureFilter::Linear: return VK_FILTER_LINEAR; } - UNREACHABLE_MSG("Invalid sampler filter={}", static_cast(filter)); + UNREACHABLE_MSG("Invalid sampler filter={}", filter); return {}; } @@ -43,7 +43,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter case Tegra::Texture::TextureMipmapFilter::Linear: return VK_SAMPLER_MIPMAP_MODE_LINEAR; } - UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast(mipmap_filter)); + UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter); return {}; } @@ -79,7 +79,7 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; default: - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast(wrap_mode)); + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", wrap_mode); return {}; } } @@ -103,8 +103,7 @@ VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_ case Tegra::Texture::DepthCompareFunc::Always: return VK_COMPARE_OP_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", - static_cast(depth_compare_func)); + UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", depth_compare_func); return {}; } @@ -228,8 +227,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo auto tuple = tex_format_tuples[static_cast(pixel_format)]; if (tuple.format == VK_FORMAT_UNDEFINED) { - UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", - static_cast(pixel_format)); + UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format); return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } @@ -275,7 +273,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { case Tegra::Engines::ShaderType::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } - UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast(stage)); + UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); return {}; } @@ -300,7 +298,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; default: - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast(topology)); + UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); return {}; } } @@ -490,8 +488,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib } break; } - UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast(type), - static_cast(size)); + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size); return {}; } @@ -522,7 +519,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return VK_COMPARE_OP_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } @@ -539,7 +536,7 @@ VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_forma case Maxwell::IndexFormat::UnsignedInt: return VK_INDEX_TYPE_UINT32; } - UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast(index_format)); + UNIMPLEMENTED_MSG("Unimplemented index_format={}", index_format); return {}; } @@ -570,7 +567,7 @@ VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) { case Maxwell::StencilOp::DecrWrapOGL: return VK_STENCIL_OP_DECREMENT_AND_WRAP; } - UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast(stencil_op)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil_op); return {}; } @@ -592,7 +589,7 @@ VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return VK_BLEND_OP_MAX; } - UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation); return {}; } @@ -656,7 +653,7 @@ VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; } - UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor); return {}; } @@ -667,7 +664,7 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return VK_FRONT_FACE_COUNTER_CLOCKWISE; } - UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face={}", front_face); return {}; } @@ -680,7 +677,7 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return VK_CULL_MODE_FRONT_AND_BACK; } - UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face); return {}; } @@ -700,7 +697,7 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { case Tegra::Texture::SwizzleSource::OneFloat: return VK_COMPONENT_SWIZZLE_ONE; } - UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast(swizzle)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", swizzle); return {}; } @@ -723,7 +720,7 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) case Maxwell::ViewportSwizzle::NegativeW: return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV; } - UNREACHABLE_MSG("Invalid swizzle={}", static_cast(swizzle)); + UNREACHABLE_MSG("Invalid swizzle={}", swizzle); return {}; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39e58a56f..3fb264d03 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -75,7 +75,7 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { case Maxwell::ShaderProgram::Fragment: return ShaderType::Fragment; default: - UNIMPLEMENTED_MSG("program={}", static_cast(program)); + UNIMPLEMENTED_MSG("program={}", program); return ShaderType::Vertex; } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 7b0169acd..5748eab3a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -114,7 +114,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { case Tegra::Shader::TextureType::TextureCube: return spv::Dim::Cube; default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast(sampler.type)); + UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); return spv::Dim::Dim2D; } } @@ -134,7 +134,7 @@ std::pair GetImageDim(const Image& image) { case Tegra::Shader::ImageType::Texture3D: return {spv::Dim::Dim3D, false}; default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast(image.type)); + UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); return {spv::Dim::Dim2D, false}; } } @@ -1254,7 +1254,7 @@ private: const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); return {OpLoad(GetTypeDefinition(type), pointer), type}; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast(attribute)); + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); return {v_float_zero, Type::Float}; } @@ -1890,7 +1890,7 @@ private: case Tegra::Shader::TextureType::Texture3D: return 3; default: - UNREACHABLE_MSG("Invalid texture type={}", static_cast(type)); + UNREACHABLE_MSG("Invalid texture type={}", type); return 2; } }(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1ff109880..ae2e3322c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -52,7 +52,7 @@ VkImageType SurfaceTargetToImage(SurfaceTarget target) { UNREACHABLE(); return {}; } - UNREACHABLE_MSG("Unknown texture target={}", static_cast(target)); + UNREACHABLE_MSG("Unknown texture target={}", target); return {}; } @@ -64,7 +64,7 @@ VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; } else { - UNREACHABLE_MSG("Invalid pixel format={}", static_cast(pixel_format)); + UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); return VK_IMAGE_ASPECT_COLOR_BIT; } } diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index afef5948d..15eb700e7 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -110,8 +110,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case SubOp::Sqrt: return Operation(OperationCode::FSqrt, PRECISE, op_a); default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", - static_cast(instr.sub_op.Value())); + UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); return Immediate(0); } }(); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index f32c3134b..7b5bb7003 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case IAdd3Height::UpperHalfWord: return BitfieldExtract(value, 16, 16); default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast(height)); + UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); return Immediate(0); } }; diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 2a30aab2b..73580277a 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -72,7 +72,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation case LogicOperation::PassB: return op_b; default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast(logic_op)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); return Immediate(0); } }(); @@ -92,8 +92,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation break; } default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", - static_cast(predicate_mode)); + UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); } } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b9989c88c..fea7a54df 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -244,7 +244,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Operation(OperationCode::FTrunc, value); default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - static_cast(instr.conversion.f2f.rounding.Value())); + instr.conversion.f2f.rounding.Value()); return value; } }(); @@ -300,7 +300,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Operation(OperationCode::FTrunc, PRECISE, value); default: UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - static_cast(instr.conversion.f2i.rounding.Value())); + instr.conversion.f2i.rounding.Value()); return Immediate(0); } }(); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e2bba88dd..50f4e7d35 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -47,7 +47,7 @@ OperationCode GetAtomOperation(AtomicOp op) { case AtomicOp::Exch: return OperationCode::AtomicIExchange; default: - UNIMPLEMENTED_MSG("op={}", static_cast(op)); + UNIMPLEMENTED_MSG("op={}", op); return OperationCode::AtomicIAdd; } } @@ -83,7 +83,7 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { case Tegra::Shader::UniformType::UnsignedQuad: return 128; default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast(uniform_type)); + UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); return 32; } } @@ -175,12 +175,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } default: - UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast(instr.ld_c.type.Value())); + UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); } break; } case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast(instr.ld_l.unknown)); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); [[fallthrough]]; case OpCode::Id::LD_S: { const auto GetAddress = [&](s32 offset) { @@ -224,7 +224,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } default: UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - static_cast(instr.ldst_sl.type.Value())); + instr.ldst_sl.type.Value()); } break; } @@ -306,8 +306,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", - static_cast(instr.st_l.cache_management.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); [[fallthrough]]; case OpCode::Id::ST_S: { const auto GetAddress = [&](s32 offset) { @@ -340,7 +339,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - static_cast(instr.ldst_sl.type.Value())); + instr.ldst_sl.type.Value()); } break; } @@ -387,7 +386,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } case OpCode::Id::RED: { UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - static_cast(instr.red.type.Value())); + instr.red.type.Value()); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -403,12 +402,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", static_cast(instr.atom.operation.Value())); + "operation={}", instr.atom.operation.Value()); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || instr.atom.type == GlobalAtomicType::U64 || instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", static_cast(instr.atom.type.Value())); + "type={}", instr.atom.type.Value()); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); @@ -428,10 +427,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || instr.atoms.operation == AtomicOp::Dec, - "operation={}", static_cast(instr.atoms.operation.Value())); + "operation={}", instr.atoms.operation.Value()); UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || instr.atoms.type == AtomicType::U64, - "type={}", static_cast(instr.atoms.type.Value())); + "type={}", instr.atoms.type.Value()); const bool is_signed = instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 1db500bc4..d3ea07aac 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -34,14 +34,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::EXIT: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", - static_cast(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); switch (instr.flow.cond) { case Tegra::Shader::FlowCondition::Always: bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast(Tegra::Shader::Pred::UnusedIndex)) { + if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { // If this is an unconditional exit then just end processing here, // otherwise we have to account for the possibility of the condition // not being met, so continue processing the next instruction. @@ -56,17 +55,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", - static_cast(instr.flow.cond.Value())); + UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); } break; } case OpCode::Id::KIL: { UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", - static_cast(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); bb.push_back(Operation(OperationCode::Discard)); break; @@ -130,8 +127,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Immediate(0u); } default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", - static_cast(instr.sys20.Value())); + UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); return Immediate(0u); } }(); @@ -181,8 +177,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } const Node branch = Operation(OperationCode::BranchIndirect, operand); - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { + const ConditionCode cc = instr.flow_condition_code; + if (cc != ConditionCode::T) { bb.push_back(Conditional(GetConditionCode(cc), {branch})); } else { bb.push_back(branch); @@ -218,9 +214,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SYNC: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", - static_cast(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); if (decompiled) { break; @@ -231,9 +226,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::BRK: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", - static_cast(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); if (decompiled) { break; } @@ -306,7 +300,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case Tegra::Shader::MembarType::GL: return OperationCode::MemoryBarrierGlobal; default: - UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast(instr.membar.type.Value())); + UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); return OperationCode::MemoryBarrierGlobal; } }(); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index d4ffa8014..a53819c15 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -125,7 +125,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { case OpCode::Id::SHF_LEFT_IMM: { UNIMPLEMENTED_IF(instr.generates_cc); UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - static_cast(instr.shf.xmode.Value())); + instr.shf.xmode.Value()); if (instr.is_b_imm) { op_b = Immediate(static_cast(instr.shf.immediate)); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 02fdccd86..fb18f631f 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -34,7 +34,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { case TextureType::TextureCube: return 3; default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast(texture_type)); + UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); return 0; } } @@ -255,8 +255,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", - static_cast(instr.txq.query_type.Value())); + UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); } break; } @@ -302,7 +301,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case TextureType::TextureCube: return 3; default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast(texture_type)); + UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); return 2; } }(); @@ -595,7 +594,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = GetRegister(instr.gpr20.Value() + bias_offset); break; default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast(process_mode)); + UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); break; } diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index 11b77f795..37433d783 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -27,7 +27,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { case VoteOperation::Eq: return OperationCode::VoteEqual; default: - UNREACHABLE_MSG("Invalid vote operation={}", static_cast(vote_op)); + UNREACHABLE_MSG("Invalid vote operation={}", vote_op); return OperationCode::VoteAll; } } diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 7bf4ff387..6a5b6940d 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -107,7 +107,7 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); return {}; default: - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast(operation_code)); + UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); return {}; } } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 879088a27..e1ab3e6e0 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -171,7 +171,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe // Default - do nothing return value; default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast(size)); + UNREACHABLE_MSG("Unimplemented conversion size: {}", size); return value; } } @@ -336,15 +336,15 @@ OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { return operation_table[index]; } -Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { +Node ShaderIR::GetConditionCode(ConditionCode cc) const { switch (cc) { - case Tegra::Shader::ConditionCode::NEU: + case ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); - case Tegra::Shader::ConditionCode::FCSM_TR: + case ConditionCode::FCSM_TR: UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); return MakeNode(Pred::NeverExecute, false); default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast(cc)); + UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); return MakeNode(Pred::NeverExecute, false); } } diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1688267bb..937e29d1e 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -28,7 +28,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t case Tegra::Texture::TextureType::Texture2DArray: return SurfaceTarget::Texture2DArray; default: - LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast(texture_type)); + LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", texture_type); UNREACHABLE(); return SurfaceTarget::Texture2D; } @@ -47,7 +47,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { case SurfaceTarget::TextureCubeArray: return true; default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); UNREACHABLE(); return false; } @@ -66,7 +66,7 @@ bool SurfaceTargetIsArray(SurfaceTarget target) { case SurfaceTarget::TextureCubeArray: return true; default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); UNREACHABLE(); return false; } @@ -85,7 +85,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: return PixelFormat::D32_FLOAT_S8_UINT; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::S8_UINT_D24_UNORM; } } @@ -183,7 +183,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) case Tegra::RenderTargetFormat::R8_UINT: return PixelFormat::R8_UINT; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::A8B8G8R8_UNORM; } } @@ -197,7 +197,7 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: return PixelFormat::B8G8R8A8_UNORM; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::A8B8G8R8_UNORM; } } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 305297719..96f93246d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -398,9 +398,9 @@ std::string SurfaceParams::TargetName() const { case SurfaceTarget::TextureCubeArray: return "CubeArray"; default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); UNREACHABLE(); - return fmt::format("TUK({})", static_cast(target)); + return fmt::format("TUK({})", target); } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ea835c59f..581d8dd5b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1191,9 +1191,8 @@ private: const SurfaceParams& src_params = src->GetSurfaceParams(); const SurfaceParams& dst_params = dst->GetSurfaceParams(); if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { - LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", - static_cast(dst_params.pixel_format), - static_cast(src_params.pixel_format)); + LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, + src_params.pixel_format); return; } ImageCopy(src, dst, copy); diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 962921483..bd1aebf02 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -82,7 +82,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h bool convert_astc, bool convert_s8z24) { if (convert_astc && IsPixelFormatASTC(pixel_format)) { LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", - static_cast(pixel_format)); + pixel_format); UNREACHABLE(); } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { -- cgit v1.2.3 From 7234f436aa0482a3e7b6a55121511d4199f87967 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 00:50:12 -0500 Subject: shader_ir: std::move node within DeclareAmend() Same behavior, but elides an unnecessary atomic reference count increment and decrement. --- src/video_core/shader/shader_ir.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 879088a27..5815e68c9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -452,8 +452,8 @@ void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { } std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const std::size_t id = amend_code.size(); - amend_code.push_back(new_amend); + const auto id = amend_code.size(); + amend_code.push_back(std::move(new_amend)); return id; } -- cgit v1.2.3 From 3954f14c6d7043804a85f2cbbad1b7e335162276 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 01:52:13 -0500 Subject: buffer_block: Remove unnecessary includes Reduces the amount of dependencies the header pulls in. --- src/video_core/buffer_cache/buffer_block.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e64170e66..eee6908b1 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -4,12 +4,7 @@ #pragma once -#include -#include - -#include "common/alignment.h" #include "common/common_types.h" -#include "video_core/gpu.h" namespace VideoCommon { -- cgit v1.2.3 From 5d2f18fbcdca61b3bf140e92bf1c7d5b163aa580 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 01:53:37 -0500 Subject: buffer_block: Mark interface as nodiscard where applicable Prevents logic errors from occurring from unused values. --- src/video_core/buffer_cache/buffer_block.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index eee6908b1..e9306194a 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -10,23 +10,23 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(VAddr start, VAddr end) const { + [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(VAddr other_start, VAddr other_end) const { + [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { return cpu_addr <= other_start && other_end <= cpu_addr_end; } - std::size_t Offset(VAddr in_addr) const { + [[nodiscard]] std::size_t Offset(VAddr in_addr) const { return static_cast(in_addr - cpu_addr); } - VAddr CpuAddr() const { + [[nodiscard]] VAddr CpuAddr() const { return cpu_addr; } - VAddr CpuAddrEnd() const { + [[nodiscard]] VAddr CpuAddrEnd() const { return cpu_addr_end; } @@ -35,11 +35,11 @@ public: cpu_addr_end = new_addr + size; } - std::size_t Size() const { + [[nodiscard]] std::size_t Size() const { return size; } - u64 Epoch() const { + [[nodiscard]] u64 Epoch() const { return epoch; } -- cgit v1.2.3 From edcbd478004a175e2e926fe109d70d0a731d56a8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 14:01:50 -0500 Subject: gl_shader_decompiler: Elide unnecessary copies within DeclareConstantBuffers() Resolves a -Wrange-loop-analysis warning. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0c97a8988..ccbdfe967 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -878,7 +878,7 @@ private: } u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto [index, info] : ir.GetConstantBuffers()) { + for (const auto& [index, info] : ir.GetConstantBuffers()) { const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, -- cgit v1.2.3 From 45c5b084fde190336d07c01368699a6129214bdf Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 16:09:29 -0500 Subject: ast: Improve string concat readability in operator() Provides an in-place format string to make it more pleasant to read. --- src/video_core/shader/ast.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index cc2dbe36c..db11144c7 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -212,16 +212,15 @@ public: } void operator()(const ExprPredicate& expr) { - inner += "P" + std::to_string(expr.predicate); + inner += fmt::format("P{}", expr.predicate); } void operator()(const ExprCondCode& expr) { - u32 cc = static_cast(expr.cc); - inner += "CC" + std::to_string(cc); + inner += fmt::format("CC{}", expr.cc); } void operator()(const ExprVar& expr) { - inner += "V" + std::to_string(expr.var_index); + inner += fmt::format("V{}", expr.var_index); } void operator()(const ExprBoolean& expr) { @@ -229,7 +228,7 @@ public: } void operator()(const ExprGprEqual& expr) { - inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); } const std::string& GetResult() const { -- cgit v1.2.3 From 09fa1d6a739b18f6a8f3d83065ff9aebd6e4bc8d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 16:30:36 -0500 Subject: video_core: Make use of ordered container contains() where applicable With C++20, we can use the more concise contains() member function instead of comparing the result of the find() call with the end iterator. --- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 4 +--- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 3 +-- src/video_core/shader/control_flow.cpp | 10 +++++----- src/video_core/shader/decode.cpp | 4 ++-- 8 files changed, 13 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e7edd733f..38961f3fd 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -545,7 +545,7 @@ private: bool IsRegionWritten(VAddr start, VAddr end) const { const u64 page_end = end >> WRITE_PAGE_BIT; for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - if (written_pages.count(page_start) > 0) { + if (written_pages.contains(page_start)) { return true; } } diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index 78066cc63..3e4d88c30 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -1485,9 +1485,7 @@ void ARBDecompiler::Exit() { } const auto safe_get_register = [this](u32 reg) -> std::string { - // TODO(Rodrigo): Replace with contains once C++20 releases - const auto& used_registers = ir.GetRegisters(); - if (used_registers.find(reg) != used_registers.end()) { + if (ir.GetRegisters().contains(reg)) { return fmt::format("R{}.x", reg); } return "{0, 0, 0, 0}.x"; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0b96481f5..eabfdea5d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -459,7 +459,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set& supported_formats) { - if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { + if (!supported_formats.contains(precompiled_entry.binary_format)) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 70dd0c3c6..955b2abc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -343,7 +343,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { } const u64 id = entry.unique_identifier; - if (stored_transferable.find(id) != stored_transferable.end()) { + if (stored_transferable.contains(id)) { // The shader already exists return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f8a1bcf34..970979fa1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -230,7 +230,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (!attribute.enabled) { continue; } - if (input_attributes.find(static_cast(index)) == input_attributes.end()) { + if (!input_attributes.contains(static_cast(index))) { // Skip attributes not used by the vertex shaders. continue; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 5748eab3a..ca12b3793 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2125,8 +2125,7 @@ private: OpStore(z_pointer, depth); } if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [&](u32 reg) { - // TODO(Rodrigo): Replace with contains once C++20 releases + const auto SafeGetRegister = [this](u32 reg) { if (const auto it = registers.find(reg); it != registers.end()) { return OpLoad(t_float, it->second); } diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 9120bf705..43d965f2f 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -257,7 +257,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) single_branch.ignore = false; break; } - if (state.registered.count(offset) != 0) { + if (state.registered.contains(offset)) { single_branch.address = offset; single_branch.ignore = true; break; @@ -632,12 +632,12 @@ void DecompileShader(CFGRebuildState& state) { for (auto label : state.labels) { state.manager->DeclareLabel(label); } - for (auto& block : state.block_info) { - if (state.labels.count(block.start) != 0) { + for (const auto& block : state.block_info) { + if (state.labels.contains(block.start)) { state.manager->InsertLabel(block.start); } const bool ignore = BlockBranchIsIgnored(block.branch); - u32 end = ignore ? block.end + 1 : block.end; + const u32 end = ignore ? block.end + 1 : block.end; state.manager->InsertBlock(block.start, end); if (!ignore) { InsertBranch(*state.manager, block.branch); @@ -737,7 +737,7 @@ std::unique_ptr ScanFlow(const ProgramCode& program_code, auto back = result_out->blocks.begin(); auto next = std::next(back); while (next != result_out->blocks.end()) { - if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { + if (!state.labels.contains(next->start) && next->start == back->end + 1) { back->end = next->end; next = result_out->blocks.erase(next); continue; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index c8f4da6df..ab14c1aa3 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -153,8 +153,8 @@ void ShaderIR::Decode() { const auto& blocks = shader_info.blocks; NodeBlock current_block; u32 current_label = static_cast(exit_branch); - for (auto& block : blocks) { - if (shader_info.labels.count(block.start) != 0) { + for (const auto& block : blocks) { + if (shader_info.labels.contains(block.start)) { insert_block(current_block, current_label); current_block.clear(); current_label = block.start; -- cgit v1.2.3 From 5dbda226593f1f8934c811963990083a23b220bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 21:01:09 -0300 Subject: vk_shader_decompiler: Silence warning when compiling without asserts --- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index ca12b3793..72954d0e3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2094,6 +2094,7 @@ private: return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); default: UNREACHABLE(); + return v_true; } } -- cgit v1.2.3 From 1e191cc837cf9ff38e5d5566919be54654d185b9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 21:01:23 -0300 Subject: video_core: Enforce C4715 (not all control paths return a value) Most of the time people write code that always returns a value, terminates execution, throws an exception, or uses an unconventional jump primitive. This is not always true when we build without asserts on mainline builds. To avoid introducing undefined behavior on our most used builds, enforce this warning signalling an error and stopping the build from shipping. --- src/video_core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a021d61f5..90f533730 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -303,6 +303,7 @@ if (MSVC) /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration + /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(video_core PRIVATE -- cgit v1.2.3 From 1b9e08ab7821815a7c2023e16c575b24d37049ba Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 20:22:07 -0300 Subject: cmake: Always enable Vulkan Removes the unnecesary burden of maintaining separate #ifdef paths and allows us sharing generic Vulkan code across APIs. --- src/video_core/CMakeLists.txt | 141 ++++++++++++++++++++---------------------- src/video_core/video_core.cpp | 4 -- 2 files changed, 66 insertions(+), 79 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a021d61f5..26db24d0f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -115,6 +115,70 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/utils.cpp renderer_opengl/utils.h + renderer_vulkan/fixed_pipeline_state.cpp + renderer_vulkan/fixed_pipeline_state.h + renderer_vulkan/maxwell_to_vk.cpp + renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/nsight_aftermath_tracker.cpp + renderer_vulkan/nsight_aftermath_tracker.h + renderer_vulkan/renderer_vulkan.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/vk_blit_screen.cpp + renderer_vulkan/vk_blit_screen.h + renderer_vulkan/vk_buffer_cache.cpp + renderer_vulkan/vk_buffer_cache.h + renderer_vulkan/vk_command_pool.cpp + renderer_vulkan/vk_command_pool.h + renderer_vulkan/vk_compute_pass.cpp + renderer_vulkan/vk_compute_pass.h + renderer_vulkan/vk_compute_pipeline.cpp + renderer_vulkan/vk_compute_pipeline.h + renderer_vulkan/vk_descriptor_pool.cpp + renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_device.cpp + renderer_vulkan/vk_device.h + renderer_vulkan/vk_fence_manager.cpp + renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h + renderer_vulkan/vk_image.cpp + renderer_vulkan/vk_image.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h + renderer_vulkan/vk_memory_manager.cpp + renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h + renderer_vulkan/vk_sampler_cache.cpp + renderer_vulkan/vk_sampler_cache.h + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_shader_decompiler.cpp + renderer_vulkan/vk_shader_decompiler.h + renderer_vulkan/vk_shader_util.cpp + renderer_vulkan/vk_shader_util.h + renderer_vulkan/vk_staging_buffer_pool.cpp + renderer_vulkan/vk_staging_buffer_pool.h + renderer_vulkan/vk_state_tracker.cpp + renderer_vulkan/vk_state_tracker.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_texture_cache.cpp + renderer_vulkan/vk_texture_cache.h + renderer_vulkan/vk_update_descriptor.cpp + renderer_vulkan/vk_update_descriptor.h + renderer_vulkan/wrapper.cpp + renderer_vulkan/wrapper.h sampler_cache.cpp sampler_cache.h shader_cache.h @@ -194,75 +258,6 @@ add_library(video_core STATIC video_core.h ) -if (ENABLE_VULKAN) - target_sources(video_core PRIVATE - renderer_vulkan/fixed_pipeline_state.cpp - renderer_vulkan/fixed_pipeline_state.h - renderer_vulkan/maxwell_to_vk.cpp - renderer_vulkan/maxwell_to_vk.h - renderer_vulkan/nsight_aftermath_tracker.cpp - renderer_vulkan/nsight_aftermath_tracker.h - renderer_vulkan/renderer_vulkan.h - renderer_vulkan/renderer_vulkan.cpp - renderer_vulkan/vk_blit_screen.cpp - renderer_vulkan/vk_blit_screen.h - renderer_vulkan/vk_buffer_cache.cpp - renderer_vulkan/vk_buffer_cache.h - renderer_vulkan/vk_command_pool.cpp - renderer_vulkan/vk_command_pool.h - renderer_vulkan/vk_compute_pass.cpp - renderer_vulkan/vk_compute_pass.h - renderer_vulkan/vk_compute_pipeline.cpp - renderer_vulkan/vk_compute_pipeline.h - renderer_vulkan/vk_descriptor_pool.cpp - renderer_vulkan/vk_descriptor_pool.h - renderer_vulkan/vk_device.cpp - renderer_vulkan/vk_device.h - renderer_vulkan/vk_fence_manager.cpp - renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h - renderer_vulkan/vk_image.cpp - renderer_vulkan/vk_image.h - renderer_vulkan/vk_master_semaphore.cpp - renderer_vulkan/vk_master_semaphore.h - renderer_vulkan/vk_memory_manager.cpp - renderer_vulkan/vk_memory_manager.h - renderer_vulkan/vk_pipeline_cache.cpp - renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_query_cache.cpp - renderer_vulkan/vk_query_cache.h - renderer_vulkan/vk_rasterizer.cpp - renderer_vulkan/vk_rasterizer.h - renderer_vulkan/vk_renderpass_cache.cpp - renderer_vulkan/vk_renderpass_cache.h - renderer_vulkan/vk_resource_pool.cpp - renderer_vulkan/vk_resource_pool.h - renderer_vulkan/vk_sampler_cache.cpp - renderer_vulkan/vk_sampler_cache.h - renderer_vulkan/vk_scheduler.cpp - renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h - renderer_vulkan/vk_shader_util.cpp - renderer_vulkan/vk_shader_util.h - renderer_vulkan/vk_staging_buffer_pool.cpp - renderer_vulkan/vk_staging_buffer_pool.h - renderer_vulkan/vk_state_tracker.cpp - renderer_vulkan/vk_state_tracker.h - renderer_vulkan/vk_stream_buffer.cpp - renderer_vulkan/vk_stream_buffer.h - renderer_vulkan/vk_swapchain.cpp - renderer_vulkan/vk_swapchain.h - renderer_vulkan/vk_texture_cache.cpp - renderer_vulkan/vk_texture_cache.h - renderer_vulkan/vk_update_descriptor.cpp - renderer_vulkan/vk_update_descriptor.h - renderer_vulkan/wrapper.cpp - renderer_vulkan/wrapper.h - ) -endif() - create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) @@ -278,12 +273,8 @@ endif() add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) - -if (ENABLE_VULKAN) - target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) - target_compile_definitions(video_core PRIVATE HAS_VULKAN) - target_link_libraries(video_core PRIVATE sirit) -endif() +target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) +target_link_libraries(video_core PRIVATE sirit) if (ENABLE_NSIGHT_AFTERMATH) if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index dd5cee4a1..837800bfe 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -11,9 +11,7 @@ #include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#ifdef HAS_VULKAN #include "video_core/renderer_vulkan/renderer_vulkan.h" -#endif #include "video_core/video_core.h" namespace { @@ -28,11 +26,9 @@ std::unique_ptr CreateRenderer( case Settings::RendererBackend::OpenGL: return std::make_unique(telemetry_session, emu_window, cpu_memory, gpu, std::move(context)); -#ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: return std::make_unique(telemetry_session, emu_window, cpu_memory, gpu, std::move(context)); -#endif default: return nullptr; } -- cgit v1.2.3 From 661483f313a7dbbbebd4f233acecb06b6505a636 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 23:34:38 -0300 Subject: renderer_vulkan/fixed_pipeline_state: Move enabled bindings to static state Without using VK_EXT_robustness2, we can't consider the 'enabled' (not null) vertex buffers as dynamic state, as this leads to invalid Vulkan state. Move this to static state that is always hashed and compared in the pipeline key. The bits for enabled vertex buffers are moved into the attribute state bitfield. This is not 'correct' as it's not an attribute state, but that struct has bits to spare, and it's used in an array of 32 elements (the exact same number of vertex buffer bindings). --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 14 +++++--------- src/video_core/renderer_vulkan/fixed_pipeline_state.h | 11 ++++------- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 13 +++---------- 3 files changed, 12 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..08662f4a8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -75,7 +75,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; } - for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; @@ -84,6 +84,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); attribute.size.Assign(static_cast(input.size.Value())); + attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); } for (std::size_t index = 0; index < std::size(attachments); ++index) { @@ -171,14 +172,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); cull_face.Assign(PackCullFace(regs.cull_face)); cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); - - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const auto& input = regs.vertex_array[index]; - VertexBinding& binding = vertex_bindings[index]; - binding.raw = 0; - binding.enabled.Assign(input.IsEnabled() ? 1 : 0); - binding.stride.Assign(static_cast(input.stride.Value())); - } + std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) { + return static_cast(array.stride.Value()); + }); } std::size_t FixedPipelineState::Hash() const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..9b18301c1 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -96,6 +96,8 @@ struct FixedPipelineState { BitField<6, 14, u32> offset; BitField<20, 3, u32> type; BitField<23, 6, u32> size; + // Not really an element of a vertex attribute, but it can be packed here + BitField<29, 1, u32> binding_index_enabled; constexpr Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); @@ -130,12 +132,6 @@ struct FixedPipelineState { } }; - union VertexBinding { - u16 raw; - BitField<0, 12, u16> stride; - BitField<12, 1, u16> enabled; - }; - struct DynamicState { union { u32 raw1; @@ -153,7 +149,8 @@ struct FixedPipelineState { BitField<0, 2, u32> cull_face; BitField<2, 1, u32> cull_enable; }; - std::array vertex_bindings; + // Vertex stride is a 12 bits value, we have 4 bits to spare per element + std::array vertex_strides; void Fill(const Maxwell& regs); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..a1a217b7c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -190,11 +190,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa // state is ignored dynamic.raw1 = 0; dynamic.raw2 = 0; - for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { - // Enable all vertex bindings - binding.raw = 0; - binding.enabled.Assign(1); - } + dynamic.vertex_strides.fill(0); } else { dynamic = state.dynamic_state; } @@ -202,19 +198,16 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa std::vector vertex_bindings; std::vector vertex_binding_divisors; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const auto& binding = dynamic.vertex_bindings[index]; - if (!binding.enabled) { + if (state.attributes[index].binding_index_enabled == 0) { continue; } const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ .binding = static_cast(index), - .stride = binding.stride, + .stride = dynamic.vertex_strides[index], .inputRate = rate, }); - if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), -- cgit v1.2.3 From 14c825bd1c37b2444e858bf1a75fb77455b4eb52 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 11 Dec 2020 22:26:14 -0800 Subject: video_core: gpu: Refactor out synchronous/asynchronous GPU implementations. - We must always use a GPU thread now, even with synchronous GPU. --- src/video_core/CMakeLists.txt | 5 +-- src/video_core/framebuffer_config.h | 31 +++++++++++++ src/video_core/gpu.cpp | 76 +++++++++++++++++++++++++++++--- src/video_core/gpu.h | 55 ++++++++---------------- src/video_core/gpu_asynch.cpp | 86 ------------------------------------- src/video_core/gpu_asynch.h | 47 -------------------- src/video_core/gpu_synch.cpp | 61 -------------------------- src/video_core/gpu_synch.h | 41 ------------------ src/video_core/gpu_thread.h | 7 ++- src/video_core/video_core.cpp | 10 +---- 10 files changed, 130 insertions(+), 289 deletions(-) create mode 100644 src/video_core/framebuffer_config.h delete mode 100644 src/video_core/gpu_asynch.cpp delete mode 100644 src/video_core/gpu_asynch.h delete mode 100644 src/video_core/gpu_synch.cpp delete mode 100644 src/video_core/gpu_synch.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b73724ce..4111ce8f7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(video_core STATIC engines/shader_bytecode.h engines/shader_header.h engines/shader_type.h + framebuffer_config.h macro/macro.cpp macro/macro.h macro/macro_hle.cpp @@ -58,10 +59,6 @@ add_library(video_core STATIC fence_manager.h gpu.cpp gpu.h - gpu_asynch.cpp - gpu_asynch.h - gpu_synch.cpp - gpu_synch.h gpu_thread.cpp gpu_thread.h guest_driver.cpp diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h new file mode 100644 index 000000000..b86c3a757 --- /dev/null +++ b/src/video_core/framebuffer_config.h @@ -0,0 +1,31 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Tegra { + +/** + * Struct describing framebuffer configuration + */ +struct FramebufferConfig { + enum class PixelFormat : u32 { + A8B8G8R8_UNORM = 1, + RGB565_UNORM = 4, + B8G8R8A8_UNORM = 5, + }; + + VAddr address{}; + u32 offset{}; + u32 width{}; + u32 height{}; + u32 stride{}; + PixelFormat pixel_format{}; + + using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; + TransformFlags transform_flags{}; + Common::Rectangle crop_rect; +}; + +} // namespace Tegra diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e2512a7f2..f99a8a0de 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -10,6 +10,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/frontend/emu_window.h" +#include "core/hardware_interrupt_manager.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) kepler_compute{std::make_unique(system, *memory_manager)}, maxwell_dma{std::make_unique(system, *memory_manager)}, kepler_memory{std::make_unique(system, *memory_manager)}, - shader_notify{std::make_unique()}, is_async{is_async_} {} + shader_notify{std::make_unique()}, is_async{is_async_}, + gpu_thread{system_} {} GPU::~GPU() = default; @@ -198,10 +200,6 @@ void GPU::SyncGuestHost() { renderer->Rasterizer().SyncGuestHost(); } -void GPU::OnCommandListEnd() { - renderer->Rasterizer().ReleaseFences(); -} - enum class GpuSemaphoreOperation { AcquireEqual = 0x1, WriteLong = 0x2, @@ -461,4 +459,72 @@ void GPU::ProcessSemaphoreAcquire() { } } +void GPU::Start() { + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + cpu_context->MakeCurrent(); +} + +void GPU::ObtainContext() { + cpu_context->MakeCurrent(); +} + +void GPU::ReleaseContext() { + cpu_context->DoneCurrent(); +} + +void GPU::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clear all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique(*this); + } + + // SubmitCommandBuffer would make the nvdec operations async, this is not currently working + // TODO(ameerj): RE proper async nvdec operation + // gpu_thread.SubmitCommandBuffer(std::move(entries)); + + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + +void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + gpu_thread.SwapBuffers(framebuffer); +} + +void GPU::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPU::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { + auto& interrupt_manager = system.InterruptManager(); + interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); +} + +void GPU::WaitIdle() const { + gpu_thread.WaitIdle(); +} + +void GPU::OnCommandListEnd() { + gpu_thread.OnCommandListEnd(); +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 660641d04..a2bb4d82d 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -15,6 +15,8 @@ #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/cdma_pusher.h" #include "video_core/dma_pusher.h" +#include "video_core/framebuffer_config.h" +#include "video_core/gpu_thread.h" using CacheAddr = std::uintptr_t; [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { @@ -101,28 +103,6 @@ enum class DepthFormat : u32 { struct CommandListHeader; class DebugContext; -/** - * Struct describing framebuffer configuration - */ -struct FramebufferConfig { - enum class PixelFormat : u32 { - A8B8G8R8_UNORM = 1, - RGB565_UNORM = 4, - B8G8R8A8_UNORM = 5, - }; - - VAddr address; - u32 offset; - u32 width; - u32 height; - u32 stride; - PixelFormat pixel_format; - - using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; - TransformFlags transform_flags; - Common::Rectangle crop_rect; -}; - namespace Engines { class Fermi2D; class Maxwell3D; @@ -141,7 +121,7 @@ enum class EngineID { class MemoryManager; -class GPU { +class GPU final { public: struct MethodCall { u32 method{}; @@ -159,7 +139,7 @@ public: }; explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); - virtual ~GPU(); + ~GPU(); /// Binds a renderer to the GPU. void BindRenderer(std::unique_ptr renderer); @@ -176,7 +156,7 @@ public: /// Synchronizes CPU writes with Host GPU memory. void SyncGuestHost(); /// Signal the ending of command list. - virtual void OnCommandListEnd(); + void OnCommandListEnd(); /// Request a host GPU memory flush from the CPU. [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); @@ -240,7 +220,7 @@ public: } // Waits for the GPU to finish working - virtual void WaitIdle() const = 0; + void WaitIdle() const; /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); @@ -330,34 +310,34 @@ public: /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary /// core timing events. - virtual void Start() = 0; + void Start(); /// Obtain the CPU Context - virtual void ObtainContext() = 0; + void ObtainContext(); /// Release the CPU Context - virtual void ReleaseContext() = 0; + void ReleaseContext(); /// Push GPU command entries to be processed - virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; + void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); /// Swap buffers (render frame) - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + void FlushRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + void InvalidateRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + void FlushAndInvalidateRegion(VAddr addr, u64 size); protected: - virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; + void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; private: void ProcessBindMethod(const MethodCall& method_call); @@ -426,6 +406,9 @@ private: u64 last_flush_fence{}; std::mutex flush_request_mutex; + VideoCommon::GPUThread::ThreadManager gpu_thread; + std::unique_ptr cpu_context; + const bool is_async; }; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp deleted file mode 100644 index 6cc091ecd..000000000 --- a/src/video_core/gpu_asynch.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "core/core.h" -#include "core/hardware_interrupt_manager.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_thread.h" -#include "video_core/renderer_base.h" - -namespace VideoCommon { - -GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_) - : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {} - -GPUAsynch::~GPUAsynch() = default; - -void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); - cpu_context = renderer->GetRenderWindow().CreateSharedContext(); - cpu_context->MakeCurrent(); -} - -void GPUAsynch::ObtainContext() { - cpu_context->MakeCurrent(); -} - -void GPUAsynch::ReleaseContext() { - cpu_context->DoneCurrent(); -} - -void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { - gpu_thread.SubmitList(std::move(entries)); -} - -void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - if (!use_nvdec) { - return; - } - // This condition fires when a video stream ends, clear all intermediary data - if (entries[0].raw == 0xDEADB33F) { - cdma_pusher.reset(); - return; - } - if (!cdma_pusher) { - cdma_pusher = std::make_unique(*this); - } - - // SubmitCommandBuffer would make the nvdec operations async, this is not currently working - // TODO(ameerj): RE proper async nvdec operation - // gpu_thread.SubmitCommandBuffer(std::move(entries)); - - cdma_pusher->Push(std::move(entries)); - cdma_pusher->DispatchCalls(); -} - -void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - gpu_thread.SwapBuffers(framebuffer); -} - -void GPUAsynch::FlushRegion(VAddr addr, u64 size) { - gpu_thread.FlushRegion(addr, size); -} - -void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { - gpu_thread.InvalidateRegion(addr, size); -} - -void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { - gpu_thread.FlushAndInvalidateRegion(addr, size); -} - -void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { - auto& interrupt_manager = system.InterruptManager(); - interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); -} - -void GPUAsynch::WaitIdle() const { - gpu_thread.WaitIdle(); -} - -void GPUAsynch::OnCommandListEnd() { - gpu_thread.OnCommandListEnd(); -} - -} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h deleted file mode 100644 index a384113f4..000000000 --- a/src/video_core/gpu_asynch.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/gpu.h" -#include "video_core/gpu_thread.h" - -namespace Core::Frontend { -class GraphicsContext; -} - -namespace VideoCore { -class RendererBase; -} // namespace VideoCore - -namespace VideoCommon { - -/// Implementation of GPU interface that runs the GPU asynchronously -class GPUAsynch final : public Tegra::GPU { -public: - explicit GPUAsynch(Core::System& system_, bool use_nvdec_); - ~GPUAsynch() override; - - void Start() override; - void ObtainContext() override; - void ReleaseContext() override; - void PushGPUEntries(Tegra::CommandList&& entries) override; - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - void WaitIdle() const override; - - void OnCommandListEnd() override; - -protected: - void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; - -private: - GPUThread::ThreadManager gpu_thread; - std::unique_ptr cpu_context; -}; - -} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp deleted file mode 100644 index 1e9d4b9b2..000000000 --- a/src/video_core/gpu_synch.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/gpu_synch.h" -#include "video_core/renderer_base.h" - -namespace VideoCommon { - -GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {} - -GPUSynch::~GPUSynch() = default; - -void GPUSynch::Start() {} - -void GPUSynch::ObtainContext() { - renderer->Context().MakeCurrent(); -} - -void GPUSynch::ReleaseContext() { - renderer->Context().DoneCurrent(); -} - -void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); -} - -void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - if (!use_nvdec) { - return; - } - // This condition fires when a video stream ends, clears all intermediary data - if (entries[0].raw == 0xDEADB33F) { - cdma_pusher.reset(); - return; - } - if (!cdma_pusher) { - cdma_pusher = std::make_unique(*this); - } - cdma_pusher->Push(std::move(entries)); - cdma_pusher->DispatchCalls(); -} - -void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - renderer->SwapBuffers(framebuffer); -} - -void GPUSynch::FlushRegion(VAddr addr, u64 size) { - renderer->Rasterizer().FlushRegion(addr, size); -} - -void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { - renderer->Rasterizer().InvalidateRegion(addr, size); -} - -void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { - renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); -} - -} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h deleted file mode 100644 index c5904b8db..000000000 --- a/src/video_core/gpu_synch.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/gpu.h" - -namespace Core::Frontend { -class GraphicsContext; -} - -namespace VideoCore { -class RendererBase; -} // namespace VideoCore - -namespace VideoCommon { - -/// Implementation of GPU interface that runs the GPU synchronously -class GPUSynch final : public Tegra::GPU { -public: - explicit GPUSynch(Core::System& system_, bool use_nvdec_); - ~GPUSynch() override; - - void Start() override; - void ObtainContext() override; - void ReleaseContext() override; - void PushGPUEntries(Tegra::CommandList&& entries) override; - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - void WaitIdle() const override {} - -protected: - void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, - [[maybe_unused]] u32 value) const override {} -}; - -} // namespace VideoCommon diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index f1c52cd9e..0071195d6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,8 +10,9 @@ #include #include #include + #include "common/threadsafe_queue.h" -#include "video_core/gpu.h" +#include "video_core/framebuffer_config.h" namespace Tegra { struct FramebufferConfig; @@ -25,6 +26,10 @@ class GraphicsContext; class System; } // namespace Core +namespace VideoCore { + class RendererBase; +} // namespace VideoCore + namespace VideoCommon::GPUThread { /// Command to signal to the GPU thread that processing has ended diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 837800bfe..53444e945 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -7,8 +7,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -39,13 +37,9 @@ std::unique_ptr CreateRenderer( namespace VideoCore { std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - std::unique_ptr gpu; const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); - if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - gpu = std::make_unique(system, use_nvdec); - } else { - gpu = std::make_unique(system, use_nvdec); - } + std::unique_ptr gpu = std::make_unique( + system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); auto context = emu_window.CreateSharedContext(); const auto scope = context->Acquire(); -- cgit v1.2.3 From 40571c073faa02a6a4301e7f0ce365ef50a400aa Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Dec 2020 00:24:33 -0800 Subject: video_core: gpu: Implement synchronous mode using threaded GPU. --- src/video_core/gpu.cpp | 7 +++++-- src/video_core/gpu.h | 4 ++-- src/video_core/gpu_thread.cpp | 30 ++++++++++++++++++++++++------ src/video_core/gpu_thread.h | 5 +++-- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index f99a8a0de..6ab06775f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -38,7 +38,7 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) maxwell_dma{std::make_unique(system, *memory_manager)}, kepler_memory{std::make_unique(system, *memory_manager)}, shader_notify{std::make_unique()}, is_async{is_async_}, - gpu_thread{system_} {} + gpu_thread{system_, is_async_} {} GPU::~GPU() = default; @@ -524,7 +524,10 @@ void GPU::WaitIdle() const { } void GPU::OnCommandListEnd() { - gpu_thread.OnCommandListEnd(); + if (is_async) { + // This command only applies to asynchronous GPU mode + gpu_thread.OnCommandListEnd(); + } } } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a2bb4d82d..d81e38680 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -406,10 +406,10 @@ private: u64 last_flush_fence{}; std::mutex flush_request_mutex; + const bool is_async; + VideoCommon::GPUThread::ThreadManager gpu_thread; std::unique_ptr cpu_context; - - const bool is_async; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index e27218b96..56b9621b1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -65,7 +65,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } } -ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} +ThreadManager::ThreadManager(Core::System& system_, bool is_async_) + : system{system_}, is_async{is_async_} {} ThreadManager::~ThreadManager() { if (!thread.joinable()) { @@ -97,19 +98,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { } void ThreadManager::FlushRegion(VAddr addr, u64 size) { - if (!Settings::IsGPULevelHigh()) { + if (!is_async) { + // Always flush with synchronous GPU mode PushCommand(FlushRegionCommand(addr, size)); return; } - if (!Settings::IsGPULevelExtreme()) { - return; - } - if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { + + // Asynchronous GPU mode + switch (Settings::values.gpu_accuracy.GetValue()) { + case Settings::GPUAccuracy::Normal: + PushCommand(FlushRegionCommand(addr, size)); + break; + case Settings::GPUAccuracy::High: + // TODO(bunnei): Is this right? Preserving existing behavior for now + break; + case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); PushCommand(GPUTickCommand()); while (fence > gpu.CurrentFlushRequestFence()) { } + break; + } + default: + UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); } } @@ -134,6 +146,12 @@ void ThreadManager::OnCommandListEnd() { u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); + + if (!is_async) { + // In synchronous GPU mode, block the caller until the command has executed + WaitIdle(); + } + return fence; } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 0071195d6..2775629e7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -27,7 +27,7 @@ class System; } // namespace Core namespace VideoCore { - class RendererBase; +class RendererBase; } // namespace VideoCore namespace VideoCommon::GPUThread { @@ -117,7 +117,7 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system_); + explicit ThreadManager(Core::System& system_, bool is_async_); ~ThreadManager(); /// Creates and starts the GPU thread. @@ -155,6 +155,7 @@ private: Core::System& system; std::thread thread; std::thread::id thread_id; + const bool is_async; }; } // namespace VideoCommon::GPUThread -- cgit v1.2.3 From 4991620f899ce21bcde1e57f585fee4081e053d0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Dec 2020 01:37:06 -0800 Subject: video_core: gpu_thread: Do not wait when system is powered down. --- src/video_core/gpu_thread.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 56b9621b1..1e95d80c3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -135,7 +135,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && + system.IsPoweredOn()) { } } -- cgit v1.2.3 From 954341763a3d8e0b9734fc2234368c40d65bace4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 24 Dec 2020 23:28:46 -0800 Subject: gpu: gpu_thread: Ensure MicroProfile is shutdown on exit. --- src/video_core/gpu_thread.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1e95d80c3..7e490bcc3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); + SCOPE_EXIT({ MicroProfileOnThreadExit(); }); + Common::SetCurrentThreadName(name.c_str()); Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); -- cgit v1.2.3 From 95d156a1502ca46b8301e1527ad14db9790495e7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:29:04 -0300 Subject: video_core/host_shaders: Add support for prebuilt SPIR-V shaders Add support for building SPIR-V shaders from GLSL and generating headers to include the text of those same GLSL shaders to consume from OpenGL. --- src/video_core/host_shaders/CMakeLists.txt | 53 +++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..ff20bc93b 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,8 +1,12 @@ -set(SHADER_SOURCES +set(SHADER_FILES opengl_present.frag opengl_present.vert ) +find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) + +set(GLSL_FLAGS "") + set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) @@ -10,27 +14,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) +foreach(FILENAME IN ITEMS ${SHADER_FILES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) - set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) - add_custom_command( - OUTPUT - ${HEADER_FILE} - COMMAND - ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} - MAIN_DEPENDENCY - ${SOURCE_FILE} - DEPENDS - ${INPUT_FILE} - # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified - ) - set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) + # Skip generating source headers on Vulkan exclusive files + if (NOT ${FILENAME} MATCHES "vulkan.*") + set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${SOURCE_HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) + endif() + # Skip compiling to SPIR-V OpenGL exclusive files + if (NOT ${FILENAME} MATCHES "opengl.*") + string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) + set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) + add_custom_command( + OUTPUT + ${SPIRV_HEADER_FILE} + COMMAND + ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) + endif() endforeach() add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_SOURCES} + ${SHADER_FILES} ) -- cgit v1.2.3 From f20e18f60d1411e54d1d85615db7edefda93e265 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:35:56 -0300 Subject: host_shaders: Add copyright headers to OpenGL present shaders --- src/video_core/host_shaders/opengl_present.frag | 4 ++++ src/video_core/host_shaders/opengl_present.vert | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core out gl_PerVertex { -- cgit v1.2.3 From 12d16248dd61d2998ee271e16d6980d6df963bbe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:39:05 -0300 Subject: host_shaders: Add block linear upload compute shaders --- src/video_core/host_shaders/CMakeLists.txt | 2 + .../host_shaders/block_linear_unswizzle_2d.comp | 122 ++++++++++++++++++++ .../host_shaders/block_linear_unswizzle_3d.comp | 125 +++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_2d.comp create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_3d.comp (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index ff20bc93b..7feb6df99 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,4 +1,6 @@ set(SHADER_FILES + block_linear_unswizzle_2d.comp + block_linear_unswizzle_3d.comp opengl_present.frag opengl_present.vert ) diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint layer_stride; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += pos.z * layer_stride; + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp @@ -0,0 +1,125 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint slice_size; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +UNIFORM(8) uint block_depth; +UNIFORM(9) uint block_depth_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; + +layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += (pos.z >> block_depth) * slice_size; + offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} -- cgit v1.2.3 From 59c46f9de94d3eab3aec3ff2abc74bd3aa8a056c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:41:42 -0300 Subject: host_shaders: Add pitch linear upload compute shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/pitch_unswizzle.comp | 86 ++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/video_core/host_shaders/pitch_unswizzle.comp (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 7feb6df99..1983e7dc9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,6 +3,7 @@ set(SHADER_FILES block_linear_unswizzle_3d.comp opengl_present.frag opengl_present.vert + pitch_unswizzle.comp ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp @@ -0,0 +1,86 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec2 origin; +UNIFORM(1) ivec2 destination; +UNIFORM(2) uint bytes_per_block; +UNIFORM(3) uint pitch; +END_PUSH_CONSTANTS + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block) { +#if HAS_EXTENDED_TYPES + case 1: + return uvec4(u8data[offset], 0, 0, 0); + case 2: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 2: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 4: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 8: + return uvec4(u64data[offset / 8], 0, 0); + case 16: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy + origin; + + uint offset = 0; + offset += pos.x * bytes_per_block; + offset += pos.y * pitch; + + const uvec4 texel = ReadTexel(offset); + const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; + imageStore(output_image, coord, texel); +} -- cgit v1.2.3 From 5169ce9fcd6240f5aaa639786e82f6589d917907 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:44:09 -0300 Subject: host_shaders: Add shader to render a full screen triangle --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/full_screen_triangle.vert | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/video_core/host_shaders/full_screen_triangle.vert (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 1983e7dc9..5770c4761 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,6 +1,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + full_screen_triangle.vert opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +#ifdef VULKAN +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) vec2 tex_scale; +UNIFORM(1) vec2 tex_offset; +END_PUSH_CONSTANTS + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); +} -- cgit v1.2.3 From dc81a906402b0a39c3ff1964cdf935b46b062b5d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:47:08 -0300 Subject: host_shaders: Add compute shader to copy BC4 as RG32UI to RGBA8 --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/opengl_copy_bc4.comp | 70 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 src/video_core/host_shaders/opengl_copy_bc4.comp (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 5770c4761..338bf9eec 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -2,6 +2,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp full_screen_triangle.vert + opengl_copy_bc4.comp opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 core +#extension GL_ARB_gpu_shader_int64 : require + +layout (local_size_x = 4, local_size_y = 4) in; + +layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; +layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; + +layout(location = 0) uniform uvec3 src_offset; +layout(location = 1) uniform uvec3 dst_offset; + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +uint DecompressBlock(uint64_t bits, uvec2 coord) { + const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); + const uint code = uint(bits >> code_offset) & 7; + const uint red0 = uint(bits >> 0) & 0xff; + const uint red1 = uint(bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void main() { + uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; + uint64_t bits = packUint2x32(packed_bits); + uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); + uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); + imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); +} -- cgit v1.2.3 From 82b7daed9cc119d1cb373e73ba3240a2e085f3eb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:48:44 -0300 Subject: host_shaders: Add shaders to convert between depth and color images --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ src/video_core/host_shaders/convert_depth_to_float.frag | 13 +++++++++++++ src/video_core/host_shaders/convert_float_to_depth.frag | 13 +++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 src/video_core/host_shaders/convert_depth_to_float.frag create mode 100644 src/video_core/host_shaders/convert_float_to_depth.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 338bf9eec..faf298f1c 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,6 +1,8 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + convert_depth_to_float.frag + convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp opengl_present.frag diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_texture; +layout(location = 0) out float output_color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + output_color = texelFetch(depth_texture, coord, 0).r; +} diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + float color = texelFetch(color_texture, coord, 0).r; + gl_FragDepth = color; +} -- cgit v1.2.3 From 64fbf319f1d4103531c0d8aecaa330346a9ecc81 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:59:12 -0300 Subject: host_shaders: Add shaders to present to the swapchain --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ src/video_core/host_shaders/vulkan_present.frag | 15 +++++++++++++++ src/video_core/host_shaders/vulkan_present.vert | 19 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_present.frag create mode 100644 src/video_core/host_shaders/vulkan_present.vert (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index faf298f1c..6084984f9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -8,6 +8,8 @@ set(SHADER_FILES opengl_present.frag opengl_present.vert pitch_unswizzle.comp + vulkan_present.frag + vulkan_present.vert ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/vulkan_present.frag b/src/video_core/host_shaders/vulkan_present.frag new file mode 100644 index 000000000..0979ff3e6 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -0,0 +1,15 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +void main() { + color = texture(color_texture, frag_tex_coord); +} diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert new file mode 100644 index 000000000..00b868958 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -0,0 +1,19 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; + +layout (location = 0) out vec2 frag_tex_coord; + +layout (set = 0, binding = 0) uniform MatrixBlock { + mat4 modelview_matrix; +}; + +void main() { + gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} -- cgit v1.2.3 From ae5725b70901431646342eb2d51801478d86fdca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:00:48 -0300 Subject: host_shaders: Add texture color blit fragment shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/vulkan_blit_color_float.frag | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_blit_color_float.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6084984f9..5b24f9866 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -8,6 +8,7 @@ set(SHADER_FILES opengl_present.frag opengl_present.vert pitch_unswizzle.comp + vulkan_blit_color_float.frag vulkan_present.frag vulkan_present.vert ) diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag @@ -0,0 +1,14 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D tex; + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +void main() { + color = textureLod(tex, texcoord, 0); +} -- cgit v1.2.3 From 87ff58b1d713df6f4c292e56d1af20c0bfb9598f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:02:07 -0300 Subject: host_shaders: Add helper to blit depth stencil fragment shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/vulkan_blit_depth_stencil.frag | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_blit_depth_stencil.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 5b24f9866..7059c2d2a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -9,6 +9,7 @@ set(SHADER_FILES opengl_present.vert pitch_unswizzle.comp vulkan_blit_color_float.frag + vulkan_blit_depth_stencil.frag vulkan_present.frag vulkan_present.vert ) diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) in vec2 texcoord; + +void main() { + gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; + gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; +} -- cgit v1.2.3 From 21b18057f7035e1442be20667662efba911653df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:03:50 -0300 Subject: host_shaders: Add Vulkan assembler compute shaders --- src/video_core/host_shaders/CMakeLists.txt | 3 ++ src/video_core/host_shaders/vulkan_quad_array.comp | 28 +++++++++++++++ .../host_shaders/vulkan_quad_indexed.comp | 41 ++++++++++++++++++++++ src/video_core/host_shaders/vulkan_uint8.comp | 24 +++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_quad_array.comp create mode 100644 src/video_core/host_shaders/vulkan_quad_indexed.comp create mode 100644 src/video_core/host_shaders/vulkan_uint8.comp (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 7059c2d2a..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,6 +12,9 @@ set(SHADER_FILES vulkan_blit_depth_stencil.frag vulkan_present.frag vulkan_present.vert + vulkan_quad_array.comp + vulkan_quad_indexed.comp + vulkan_uint8.comp ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp new file mode 100644 index 000000000..212f4e998 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_quad_array.comp @@ -0,0 +1,28 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint first; +}; + +void main() { + uint primitive = gl_GlobalInvocationID.x; + if (primitive * 6 >= output_indexes.length()) { + return; + } + + const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + uint index = first + primitive * 4 + quad_map[vertex]; + output_indexes[primitive * 6 + vertex] = index; + } +} diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp new file mode 100644 index 000000000..8655591d0 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint base_vertex; + int index_shift; // 0: uint8, 1: uint16, 2: uint32 +}; + +void main() { + int primitive = int(gl_GlobalInvocationID.x); + if (primitive * 6 >= output_indexes.length()) { + return; + } + + int index_size = 8 << index_shift; + int flipped_shift = 2 - index_shift; + int mask = (1 << flipped_shift) - 1; + + const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + int offset = primitive * 4 + quad_swizzle[vertex]; + int int_offset = offset >> flipped_shift; + int bit_offset = (offset & mask) * index_size; + uint packed_input = input_indexes[int_offset]; + uint index = bitfieldExtract(packed_input, bit_offset, index_size); + output_indexes[primitive * 6 + vertex] = index + base_vertex; + } +} diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp new file mode 100644 index 000000000..ad74d7af9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_uint8.comp @@ -0,0 +1,24 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint8_t input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint16_t output_indexes[]; +}; + +void main() { + uint id = gl_GlobalInvocationID.x; + if (id < input_indexes.length()) { + output_indexes[id] = uint16_t(input_indexes[id]); + } +} -- cgit v1.2.3 From 9106ac1e6b912d7098845c346e5465b780bd70dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:06:22 -0300 Subject: video_core: Add a delayed destruction ring abstraction --- src/video_core/CMakeLists.txt | 1 + src/video_core/delayed_destruction_ring.h | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 src/video_core/delayed_destruction_ring.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b73724ce..acf96f789 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -25,6 +25,7 @@ add_library(video_core STATIC command_classes/vic.h compatible_formats.cpp compatible_formats.h + delayed_destruction_ring.h dirty_flags.cpp dirty_flags.h dma_pusher.cpp diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +namespace VideoCommon { + +/// Container to push objects to be destroyed a few ticks in the future +template +class DelayedDestructionRing { +public: + void Tick() { + index = (index + 1) % TICKS_TO_DESTROY; + elements[index].clear(); + } + + void Push(T&& object) { + elements[index].push_back(std::move(object)); + } + +private: + size_t index = 0; + std::array, TICKS_TO_DESTROY> elements; +}; + +} // namespace VideoCommon -- cgit v1.2.3 From 9764c13d6d2977903f407761b27d847c0056e1c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:25:23 -0300 Subject: video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues. --- src/video_core/CMakeLists.txt | 47 +- src/video_core/buffer_cache/buffer_cache.h | 19 +- src/video_core/command_classes/vic.cpp | 8 +- src/video_core/compatible_formats.cpp | 142 +- src/video_core/compatible_formats.h | 23 +- src/video_core/dirty_flags.cpp | 7 + src/video_core/dirty_flags.h | 3 + src/video_core/engines/fermi_2d.cpp | 89 +- src/video_core/engines/fermi_2d.h | 331 ++- src/video_core/engines/kepler_compute.cpp | 26 +- src/video_core/engines/kepler_compute.h | 5 - src/video_core/engines/maxwell_3d.cpp | 45 +- src/video_core/engines/maxwell_3d.h | 127 +- src/video_core/engines/maxwell_dma.cpp | 3 + src/video_core/fence_manager.h | 17 +- src/video_core/memory_manager.cpp | 5 +- src/video_core/morton.cpp | 250 -- src/video_core/morton.h | 18 - src/video_core/rasterizer_interface.h | 12 +- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 7 +- src/video_core/renderer_opengl/gl_buffer_cache.h | 8 +- src/video_core/renderer_opengl/gl_device.cpp | 64 +- src/video_core/renderer_opengl/gl_device.h | 13 +- .../renderer_opengl/gl_fence_manager.cpp | 2 +- src/video_core/renderer_opengl/gl_fence_manager.h | 4 +- .../renderer_opengl/gl_framebuffer_cache.cpp | 85 - .../renderer_opengl/gl_framebuffer_cache.h | 68 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 504 ++-- src/video_core/renderer_opengl/gl_rasterizer.h | 63 +- .../renderer_opengl/gl_resource_manager.cpp | 2 +- .../renderer_opengl/gl_sampler_cache.cpp | 52 - src/video_core/renderer_opengl/gl_sampler_cache.h | 25 - src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - .../renderer_opengl/gl_shader_decompiler.cpp | 8 +- .../renderer_opengl/gl_shader_decompiler.h | 4 +- .../renderer_opengl/gl_shader_manager.cpp | 15 + src/video_core/renderer_opengl/gl_shader_manager.h | 6 + .../renderer_opengl/gl_state_tracker.cpp | 7 + src/video_core/renderer_opengl/gl_state_tracker.h | 15 +- .../renderer_opengl/gl_stream_buffer.cpp | 32 +- src/video_core/renderer_opengl/gl_stream_buffer.h | 19 +- .../renderer_opengl/gl_texture_cache.cpp | 1454 +++++++----- src/video_core/renderer_opengl/gl_texture_cache.h | 286 ++- src/video_core/renderer_opengl/maxwell_to_gl.h | 13 + src/video_core/renderer_opengl/renderer_opengl.cpp | 49 +- src/video_core/renderer_opengl/renderer_opengl.h | 1 + src/video_core/renderer_opengl/util_shaders.cpp | 224 ++ src/video_core/renderer_opengl/util_shaders.h | 51 + src/video_core/renderer_opengl/utils.cpp | 42 - src/video_core/renderer_opengl/utils.h | 16 - src/video_core/renderer_vulkan/blit_image.cpp | 624 +++++ src/video_core/renderer_vulkan/blit_image.h | 97 + .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 30 +- src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +- src/video_core/renderer_vulkan/renderer_vulkan.h | 3 +- src/video_core/renderer_vulkan/shaders/blit.frag | 24 - src/video_core/renderer_vulkan/shaders/blit.vert | 28 - .../renderer_vulkan/shaders/quad_array.comp | 37 - .../renderer_vulkan/shaders/quad_indexed.comp | 50 - src/video_core/renderer_vulkan/shaders/uint8.comp | 33 - src/video_core/renderer_vulkan/vk_blit_screen.cpp | 301 +-- src/video_core/renderer_vulkan/vk_blit_screen.h | 4 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 67 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 10 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 327 +-- src/video_core/renderer_vulkan/vk_compute_pass.h | 4 +- src/video_core/renderer_vulkan/vk_device.cpp | 110 +- src/video_core/renderer_vulkan/vk_device.h | 34 + .../renderer_vulkan/vk_fence_manager.cpp | 7 +- src/video_core/renderer_vulkan/vk_fence_manager.h | 6 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 64 +- .../renderer_vulkan/vk_graphics_pipeline.h | 26 +- src/video_core/renderer_vulkan/vk_image.cpp | 135 -- src/video_core/renderer_vulkan/vk_image.h | 84 - .../renderer_vulkan/vk_memory_manager.cpp | 2 +- src/video_core/renderer_vulkan/vk_memory_manager.h | 20 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 28 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 16 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 700 +++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 131 +- .../renderer_vulkan/vk_renderpass_cache.cpp | 158 -- .../renderer_vulkan/vk_renderpass_cache.h | 70 - .../renderer_vulkan/vk_sampler_cache.cpp | 83 - src/video_core/renderer_vulkan/vk_sampler_cache.h | 29 - src/video_core/renderer_vulkan/vk_scheduler.cpp | 79 +- src/video_core/renderer_vulkan/vk_scheduler.h | 14 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 6 +- .../renderer_vulkan/vk_shader_decompiler.h | 8 +- src/video_core/renderer_vulkan/vk_shader_util.cpp | 11 +- src/video_core/renderer_vulkan/vk_shader_util.h | 4 +- .../renderer_vulkan/vk_state_tracker.cpp | 23 +- src/video_core/renderer_vulkan/vk_state_tracker.h | 8 + .../renderer_vulkan/vk_stream_buffer.cpp | 20 +- src/video_core/renderer_vulkan/vk_stream_buffer.h | 12 +- .../renderer_vulkan/vk_texture_cache.cpp | 1473 ++++++++---- src/video_core/renderer_vulkan/vk_texture_cache.h | 328 +-- .../renderer_vulkan/vk_update_descriptor.h | 30 +- src/video_core/renderer_vulkan/wrapper.cpp | 69 +- src/video_core/renderer_vulkan/wrapper.h | 117 +- src/video_core/sampler_cache.cpp | 21 - src/video_core/sampler_cache.h | 60 - src/video_core/shader/async_shaders.cpp | 9 +- src/video_core/shader/async_shaders.h | 6 +- src/video_core/shader/decode.cpp | 6 +- src/video_core/shader/decode/image.cpp | 11 +- src/video_core/shader/decode/texture.cpp | 56 +- src/video_core/shader/node.h | 33 +- src/video_core/shader/shader_ir.h | 18 +- src/video_core/surface.cpp | 2 +- src/video_core/surface.h | 152 +- .../texture_cache/accelerated_swizzle.cpp | 70 + src/video_core/texture_cache/accelerated_swizzle.h | 45 + src/video_core/texture_cache/copy_params.h | 36 - src/video_core/texture_cache/decode_bc4.cpp | 97 + src/video_core/texture_cache/decode_bc4.h | 16 + src/video_core/texture_cache/descriptor_table.h | 82 + .../texture_cache/format_lookup_table.cpp | 380 ++-- src/video_core/texture_cache/format_lookup_table.h | 42 +- src/video_core/texture_cache/formatter.cpp | 95 + src/video_core/texture_cache/formatter.h | 263 +++ src/video_core/texture_cache/image_base.cpp | 216 ++ src/video_core/texture_cache/image_base.h | 83 + src/video_core/texture_cache/image_info.cpp | 189 ++ src/video_core/texture_cache/image_info.h | 38 + src/video_core/texture_cache/image_view_base.cpp | 41 + src/video_core/texture_cache/image_view_base.h | 47 + src/video_core/texture_cache/image_view_info.cpp | 88 + src/video_core/texture_cache/image_view_info.h | 50 + src/video_core/texture_cache/render_targets.h | 51 + src/video_core/texture_cache/samples_helper.h | 55 + src/video_core/texture_cache/slot_vector.h | 156 ++ src/video_core/texture_cache/surface_base.cpp | 299 --- src/video_core/texture_cache/surface_base.h | 333 --- src/video_core/texture_cache/surface_params.cpp | 445 ---- src/video_core/texture_cache/surface_params.h | 294 --- src/video_core/texture_cache/surface_view.cpp | 27 - src/video_core/texture_cache/surface_view.h | 68 - src/video_core/texture_cache/texture_cache.h | 2397 +++++++++++--------- src/video_core/texture_cache/types.h | 140 ++ src/video_core/texture_cache/util.cpp | 1232 ++++++++++ src/video_core/texture_cache/util.h | 107 + src/video_core/textures/astc.cpp | 58 +- src/video_core/textures/astc.h | 5 +- src/video_core/textures/convert.cpp | 93 - src/video_core/textures/convert.h | 22 - src/video_core/textures/decoders.cpp | 249 +- src/video_core/textures/decoders.h | 44 +- src/video_core/textures/texture.cpp | 16 +- src/video_core/textures/texture.h | 239 +- 152 files changed, 10359 insertions(+), 8101 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.h delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.h create mode 100644 src/video_core/renderer_opengl/util_shaders.cpp create mode 100644 src/video_core/renderer_opengl/util_shaders.h delete mode 100644 src/video_core/renderer_opengl/utils.cpp delete mode 100644 src/video_core/renderer_opengl/utils.h create mode 100644 src/video_core/renderer_vulkan/blit_image.cpp create mode 100644 src/video_core/renderer_vulkan/blit_image.h delete mode 100644 src/video_core/renderer_vulkan/shaders/blit.frag delete mode 100644 src/video_core/renderer_vulkan/shaders/blit.vert delete mode 100644 src/video_core/renderer_vulkan/shaders/quad_array.comp delete mode 100644 src/video_core/renderer_vulkan/shaders/quad_indexed.comp delete mode 100644 src/video_core/renderer_vulkan/shaders/uint8.comp delete mode 100644 src/video_core/renderer_vulkan/vk_image.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_image.h delete mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_sampler_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_sampler_cache.h delete mode 100644 src/video_core/sampler_cache.cpp delete mode 100644 src/video_core/sampler_cache.h create mode 100644 src/video_core/texture_cache/accelerated_swizzle.cpp create mode 100644 src/video_core/texture_cache/accelerated_swizzle.h delete mode 100644 src/video_core/texture_cache/copy_params.h create mode 100644 src/video_core/texture_cache/decode_bc4.cpp create mode 100644 src/video_core/texture_cache/decode_bc4.h create mode 100644 src/video_core/texture_cache/descriptor_table.h create mode 100644 src/video_core/texture_cache/formatter.cpp create mode 100644 src/video_core/texture_cache/formatter.h create mode 100644 src/video_core/texture_cache/image_base.cpp create mode 100644 src/video_core/texture_cache/image_base.h create mode 100644 src/video_core/texture_cache/image_info.cpp create mode 100644 src/video_core/texture_cache/image_info.h create mode 100644 src/video_core/texture_cache/image_view_base.cpp create mode 100644 src/video_core/texture_cache/image_view_base.h create mode 100644 src/video_core/texture_cache/image_view_info.cpp create mode 100644 src/video_core/texture_cache/image_view_info.h create mode 100644 src/video_core/texture_cache/render_targets.h create mode 100644 src/video_core/texture_cache/samples_helper.h create mode 100644 src/video_core/texture_cache/slot_vector.h delete mode 100644 src/video_core/texture_cache/surface_base.cpp delete mode 100644 src/video_core/texture_cache/surface_base.h delete mode 100644 src/video_core/texture_cache/surface_params.cpp delete mode 100644 src/video_core/texture_cache/surface_params.h delete mode 100644 src/video_core/texture_cache/surface_view.cpp delete mode 100644 src/video_core/texture_cache/surface_view.h create mode 100644 src/video_core/texture_cache/types.h create mode 100644 src/video_core/texture_cache/util.cpp create mode 100644 src/video_core/texture_cache/util.h delete mode 100644 src/video_core/textures/convert.cpp delete mode 100644 src/video_core/textures/convert.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index acf96f789..948e167c3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -85,14 +85,10 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_framebuffer_cache.cpp - renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h - renderer_opengl/gl_sampler_cache.cpp - renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -114,8 +110,10 @@ add_library(video_core STATIC renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h - renderer_opengl/utils.cpp - renderer_opengl/utils.h + renderer_opengl/util_shaders.cpp + renderer_opengl/util_shaders.h + renderer_vulkan/blit_image.cpp + renderer_vulkan/blit_image.h renderer_vulkan/fixed_pipeline_state.cpp renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp @@ -142,8 +140,6 @@ add_library(video_core STATIC renderer_vulkan/vk_fence_manager.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h - renderer_vulkan/vk_image.cpp - renderer_vulkan/vk_image.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_memory_manager.cpp @@ -154,12 +150,8 @@ add_library(video_core STATIC renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h - renderer_vulkan/vk_renderpass_cache.cpp - renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h - renderer_vulkan/vk_sampler_cache.cpp - renderer_vulkan/vk_sampler_cache.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_shader_decompiler.cpp @@ -180,8 +172,6 @@ add_library(video_core STATIC renderer_vulkan/vk_update_descriptor.h renderer_vulkan/wrapper.cpp renderer_vulkan/wrapper.h - sampler_cache.cpp - sampler_cache.h shader_cache.h shader_notify.cpp shader_notify.h @@ -238,19 +228,32 @@ add_library(video_core STATIC shader/transform_feedback.h surface.cpp surface.h + texture_cache/accelerated_swizzle.cpp + texture_cache/accelerated_swizzle.h + texture_cache/decode_bc4.cpp + texture_cache/decode_bc4.h + texture_cache/descriptor_table.h + texture_cache/formatter.cpp + texture_cache/formatter.h texture_cache/format_lookup_table.cpp texture_cache/format_lookup_table.h - texture_cache/surface_base.cpp - texture_cache/surface_base.h - texture_cache/surface_params.cpp - texture_cache/surface_params.h - texture_cache/surface_view.cpp - texture_cache/surface_view.h + texture_cache/image_base.cpp + texture_cache/image_base.h + texture_cache/image_info.cpp + texture_cache/image_info.h + texture_cache/image_view_base.cpp + texture_cache/image_view_base.h + texture_cache/image_view_info.cpp + texture_cache/image_view_info.h + texture_cache/render_targets.h + texture_cache/samples_helper.h + texture_cache/slot_vector.h texture_cache/texture_cache.h + texture_cache/types.h + texture_cache/util.cpp + texture_cache/util.h textures/astc.cpp textures/astc.h - textures/convert.cpp - textures/convert.h textures/decoders.cpp textures/decoders.h textures/texture.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 38961f3fd..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,20 +118,17 @@ public: /// Prepares the buffer cache for data uploading /// @param max_size Maximum number of bytes that will be uploaded /// @return True when a stream buffer invalidation was required, false otherwise - bool Map(std::size_t max_size) { + void Map(std::size_t max_size) { std::lock_guard lock{mutex}; - bool invalidated; - std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); buffer_offset = buffer_offset_base; - - return invalidated; } /// Finishes the upload stream void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); + stream_buffer.Unmap(buffer_offset - buffer_offset_base); } /// Function called at the end of each frame, inteded for deferred operations @@ -261,9 +258,9 @@ public: protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - std::unique_ptr stream_buffer_) + StreamBuffer& stream_buffer_) : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, - stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} + stream_buffer{stream_buffer_} {} ~BufferCache() = default; @@ -441,7 +438,7 @@ private: buffer_ptr += size; buffer_offset += size; - return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; + return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; } void AlignBuffer(std::size_t alignment) { @@ -567,9 +564,7 @@ private: VideoCore::RasterizerInterface& rasterizer; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - - std::unique_ptr stream_buffer; - BufferType stream_buffer_handle; + StreamBuffer& stream_buffer; u8* buffer_ptr = nullptr; u64 buffer_offset = 0; diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,7 +9,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" extern "C" { #include @@ -105,9 +105,9 @@ void Vic::Execute() { const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, block_height, 0); std::vector swizzled_data(size); - Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, - swizzled_data.data(), converted_frame_buffer.get(), - false, block_height, 0, 1); + Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, + frame->width, 4, swizzled_data.data(), + converted_frame_buffer.get(), block_height, 0, 0); gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); gpu.Maxwell3D().OnMemoryWrite(); diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..1619d8664 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include -#include #include +#include "common/common_types.h" #include "video_core/compatible_formats.h" #include "video_core/surface.h" @@ -13,23 +13,25 @@ namespace VideoCore::Surface { namespace { +using Table = std::array, MaxPixelFormat>; + // Compatibility table taken from Table 3.X.2 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt -constexpr std::array VIEW_CLASS_128_BITS = { +constexpr std::array VIEW_CLASS_128_BITS{ PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_SINT, }; -constexpr std::array VIEW_CLASS_96_BITS = { +constexpr std::array VIEW_CLASS_96_BITS{ PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, -constexpr std::array VIEW_CLASS_64_BITS = { +constexpr std::array VIEW_CLASS_64_BITS{ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, @@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = { // TODO: How should we handle 48 bits? -constexpr std::array VIEW_CLASS_32_BITS = { +constexpr std::array VIEW_CLASS_32_BITS{ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, @@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = { // TODO: How should we handle 24 bits? -constexpr std::array VIEW_CLASS_16_BITS = { +constexpr std::array VIEW_CLASS_16_BITS{ PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -constexpr std::array VIEW_CLASS_8_BITS = { +constexpr std::array VIEW_CLASS_8_BITS{ PixelFormat::R8_UINT, PixelFormat::R8_UNORM, PixelFormat::R8_SINT, PixelFormat::R8_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC1_RED = { +constexpr std::array VIEW_CLASS_RGTC1_RED{ PixelFormat::BC4_UNORM, PixelFormat::BC4_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC2_RG = { +constexpr std::array VIEW_CLASS_RGTC2_RG{ PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, }; -constexpr std::array VIEW_CLASS_BPTC_UNORM = { +constexpr std::array VIEW_CLASS_BPTC_UNORM{ PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, }; -constexpr std::array VIEW_CLASS_BPTC_FLOAT = { +constexpr std::array VIEW_CLASS_BPTC_FLOAT{ PixelFormat::BC6H_SFLOAT, PixelFormat::BC6H_UFLOAT, }; +constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ + PixelFormat::ASTC_2D_4X4_UNORM, + PixelFormat::ASTC_2D_4X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ + PixelFormat::ASTC_2D_5X4_UNORM, + PixelFormat::ASTC_2D_5X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ + PixelFormat::ASTC_2D_5X5_UNORM, + PixelFormat::ASTC_2D_5X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ + PixelFormat::ASTC_2D_6X5_UNORM, + PixelFormat::ASTC_2D_6X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ + PixelFormat::ASTC_2D_6X6_UNORM, + PixelFormat::ASTC_2D_6X6_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ + PixelFormat::ASTC_2D_8X5_UNORM, + PixelFormat::ASTC_2D_8X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ + PixelFormat::ASTC_2D_8X8_UNORM, + PixelFormat::ASTC_2D_8X8_SRGB, +}; + +// Missing formats: +// PixelFormat::ASTC_2D_10X5_UNORM +// PixelFormat::ASTC_2D_10X5_SRGB + +// Missing formats: +// PixelFormat::ASTC_2D_10X6_UNORM +// PixelFormat::ASTC_2D_10X6_SRGB + +constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ + PixelFormat::ASTC_2D_10X8_UNORM, + PixelFormat::ASTC_2D_10X8_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ + PixelFormat::ASTC_2D_10X10_UNORM, + PixelFormat::ASTC_2D_10X10_SRGB, +}; + +// Missing formats +// ASTC_2D_12X10_UNORM, +// ASTC_2D_12X10_SRGB, + +constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ + PixelFormat::ASTC_2D_12X12_UNORM, + PixelFormat::ASTC_2D_12X12_SRGB, +}; + // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt -constexpr std::array COPY_CLASS_128_BITS = { +constexpr std::array COPY_CLASS_128_BITS{ PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, @@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = { // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 -constexpr std::array COPY_CLASS_64_BITS = { +constexpr std::array COPY_CLASS_64_BITS{ PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, @@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = { // COMPRESSED_RGBA_S3TC_DXT1_EXT // COMPRESSED_SIGNED_RED_RGTC1 -void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { - compatiblity[format_a][format_b] = true; - compatiblity[format_b][format_a] = true; +constexpr void Enable(Table& table, size_t format_a, size_t format_b) { + table[format_a][format_b / 64] |= u64(1) << (format_b % 64); + table[format_b][format_a / 64] |= u64(1) << (format_a % 64); } -void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { - Enable(compatibility, static_cast(format_a), static_cast(format_b)); +constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { + Enable(table, static_cast(format_a), static_cast(format_b)); } template -void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { +constexpr void EnableRange(Table& table, const Range& range) { for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { for (auto it_b = it_a; it_b != range.end(); ++it_b) { - Enable(compatibility, *it_a, *it_b); + Enable(table, *it_a, *it_b); } } } -} // Anonymous namespace +constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { + const size_t a = static_cast(format_a); + const size_t b = static_cast(format_b); + return ((table[a][b / 64] >> (b % 64)) & 1) != 0; +} -FormatCompatibility::FormatCompatibility() { +constexpr Table MakeViewTable() { + Table view{}; for (size_t i = 0; i < MaxPixelFormat; ++i) { // Identity is allowed Enable(view, i, i); } - EnableRange(view, VIEW_CLASS_128_BITS); EnableRange(view, VIEW_CLASS_96_BITS); EnableRange(view, VIEW_CLASS_64_BITS); @@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() { EnableRange(view, VIEW_CLASS_RGTC2_RG); EnableRange(view, VIEW_CLASS_BPTC_UNORM); EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); + return view; +} - copy = view; +constexpr Table MakeCopyTable() { + Table copy = MakeViewTable(); EnableRange(copy, COPY_CLASS_128_BITS); EnableRange(copy, COPY_CLASS_64_BITS); + return copy; +} + +} // Anonymous namespace + +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeViewTable(); + return IsSupported(TABLE, format_a, format_b); +} + +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeCopyTable(); + return IsSupported(TABLE, format_a, format_b); } } // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..b5eb03bea 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -4,31 +4,12 @@ #pragma once -#include -#include -#include - #include "video_core/surface.h" namespace VideoCore::Surface { -class FormatCompatibility { -public: - using Table = std::array, MaxPixelFormat>; - - explicit FormatCompatibility(); - - bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { - return view[static_cast(format_a)][static_cast(format_b)]; - } - - bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { - return copy[static_cast(format_a)][static_cast(format_b)]; - } +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); -private: - Table view; - Table copy; -}; +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); } // namespace VideoCore::Surface diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 2faa6ef0e..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -16,6 +16,9 @@ namespace VideoCommon::Dirty { using Tegra::Engines::Maxwell3D; void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); + FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); + static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); } FillBlock(tables[1], begin, num, RenderTargets); + FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); + + tables[0][OFF(rt_control)] = RenderTargets; + tables[1][OFF(rt_control)] = RenderTargetControl; static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty { enum : u8 { NullEntry = 0, + Descriptors, + RenderTargets, + RenderTargetControl, ColorBuffer0, ColorBuffer1, ColorBuffer2, diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,11 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D() = default; +Fermi2D::Fermi2D() { + // Nvidia's OpenGL driver seems to assume these values + regs.src.depth = 1; + regs.dst.depth = 1; +} Fermi2D::~Fermi2D() = default; @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); - regs.reg_array[method] = method_argument; - switch (method) { - // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, - // so trigger on the second 32-bit write. - case FERMI2D_REG_INDEX(blit_src_y) + 1: { - HandleSurfaceCopy(); - break; - } + if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { + Blit(); } } void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + for (u32 i = 0; i < amount; ++i) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } } -static std::pair DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { - const u32 line_a = src_2 - src_1; - const u32 line_b = dst_2 - dst_1; - const u32 excess = std::max(0, line_a - src_line + src_1); - return {line_b - (excess * line_b) / line_a, excess}; -} - -void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); +void Fermi2D::Blit() { + LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", + regs.src.Address(), regs.dst.Address()); - // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Operation::SrcCopy); + UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); + UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); + UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); - const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; - const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; - u32 src_blit_x2, src_blit_y2; - if (regs.blit_control.origin == Origin::Corner) { - src_blit_x2 = - static_cast((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); - src_blit_y2 = - static_cast((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); - } else { - src_blit_x2 = static_cast((regs.blit_src_x >> 32) + regs.blit_dst_width); - src_blit_y2 = static_cast((regs.blit_src_y >> 32) + regs.blit_dst_height); - } - u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; - u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; - const auto [new_dst_w, src_excess_x] = - DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); - const auto [new_dst_h, src_excess_y] = - DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); - dst_blit_x2 = new_dst_w + regs.blit_dst_x; - src_blit_x2 = src_blit_x2 - src_excess_x; - dst_blit_y2 = new_dst_h + regs.blit_dst_y; - src_blit_y2 = src_blit_y2 - src_excess_y; - const auto [new_src_w, dst_excess_x] = - DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); - const auto [new_src_h, dst_excess_y] = - DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); - src_blit_x2 = new_src_w + src_blit_x1; - dst_blit_x2 = dst_blit_x2 - dst_excess_x; - src_blit_y2 = new_src_h + src_blit_y1; - dst_blit_y2 = dst_blit_y2 - dst_excess_y; - const Common::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, - dst_blit_y2}; - const Config copy_config{ + const auto& args = regs.pixels_from_memory; + const Config config{ .operation = regs.operation, - .filter = regs.blit_control.filter, - .src_rect = src_rect, - .dst_rect = dst_rect, + .filter = args.sample_mode.filter, + .dst_x0 = args.dst_x0, + .dst_y0 = args.dst_y0, + .dst_x1 = args.dst_x0 + args.dst_width, + .dst_y1 = args.dst_y0 + args.dst_height, + .src_x0 = static_cast(args.src_x0 >> 32), + .src_y0 = static_cast(args.src_y0 >> 32), + .src_x1 = static_cast((args.du_dx * args.dst_width + args.src_x0) >> 32), + .src_y1 = static_cast((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -53,8 +53,8 @@ public: }; enum class Filter : u32 { - PointSample = 0, // Nearest - Linear = 1, + Point = 0, + Bilinear = 1, }; enum class Operation : u32 { @@ -67,88 +67,235 @@ public: BlendPremult = 6, }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x258; + enum class MemoryLayout : u32 { + BlockLinear = 0, + Pitch = 1, + }; - struct Surface { - RenderTargetFormat format; - BitField<0, 1, u32> linear; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 depth; - u32 layer; - u32 pitch; - u32 width; - u32 height; - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return block_width.Value(); - } - - u32 BlockHeight() const { - return block_height.Value(); - } - - u32 BlockDepth() const { - return block_depth.Value(); - } - }; - static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + enum class CpuIndexWrap : u32 { + Wrap = 0, + NoWrap = 1, + }; + struct Surface { + RenderTargetFormat format; + MemoryLayout linear; union { - struct { - INSERT_UNION_PADDING_WORDS(0x80); + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 addr_upper; + u32 addr_lower; + + [[nodiscard]] constexpr GPUVAddr Address() const noexcept { + return (static_cast(addr_upper) << 32) | static_cast(addr_lower); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); - Surface dst; + enum class SectorPromotion : u32 { + NoPromotion = 0, + PromoteTo2V = 1, + PromoteTo2H = 2, + PromoteTo4 = 3, + }; + + enum class NumTpcs : u32 { + All = 0, + One = 1, + }; - INSERT_UNION_PADDING_WORDS(2); + enum class RenderEnableMode : u32 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, + }; - Surface src; + enum class ColorKeyFormat : u32 { + A16R56G6B5 = 0, + A1R5G55B5 = 1, + A8R8G8B8 = 2, + A2R10G10B10 = 3, + Y8 = 4, + Y16 = 5, + Y32 = 6, + }; - INSERT_UNION_PADDING_WORDS(0x15); + union Beta4 { + BitField<0, 8, u32> b; + BitField<8, 8, u32> g; + BitField<16, 8, u32> r; + BitField<24, 8, u32> a; + }; - Operation operation; + struct Point { + u32 x; + u32 y; + }; - INSERT_UNION_PADDING_WORDS(0x177); + enum class PatternSelect : u32 { + MonoChrome8x8 = 0, + MonoChrome64x1 = 1, + MonoChrome1x64 = 2, + Color = 3, + }; + enum class NotifyType : u32 { + WriteOnly = 0, + WriteThenAwaken = 1, + }; + + enum class MonochromePatternColorFormat : u32 { + A8X8R8G6B5 = 0, + A1R5G5B5 = 1, + A8R8G8B8 = 2, + A8Y8 = 3, + A8X8Y16 = 4, + Y32 = 5, + }; + + enum class MonochromePatternFormat : u32 { + CGA6_M1 = 0, + LE_M1 = 1, + }; + + union Regs { + static constexpr std::size_t NUM_REGS = 0x258; + struct { + u32 object; + INSERT_UNION_PADDING_WORDS(0x3F); + u32 no_operation; + NotifyType notify; + INSERT_UNION_PADDING_WORDS(0x2); + u32 wait_for_idle; + INSERT_UNION_PADDING_WORDS(0xB); + u32 pm_trigger; + INSERT_UNION_PADDING_WORDS(0xF); + u32 context_dma_notify; + u32 dst_context_dma; + u32 src_context_dma; + u32 semaphore_context_dma; + INSERT_UNION_PADDING_WORDS(0x1C); + Surface dst; + CpuIndexWrap pixels_from_cpu_index_wrap; + u32 kind2d_check_enable; + Surface src; + SectorPromotion pixels_from_memory_sector_promotion; + INSERT_UNION_PADDING_WORDS(0x1); + NumTpcs num_tpcs; + u32 render_enable_addr_upper; + u32 render_enable_addr_lower; + RenderEnableMode render_enable_mode; + INSERT_UNION_PADDING_WORDS(0x4); + u32 clip_x0; + u32 clip_y0; + u32 clip_width; + u32 clip_height; + BitField<0, 1, u32> clip_enable; + BitField<0, 3, ColorKeyFormat> color_key_format; + u32 color_key; + BitField<0, 1, u32> color_key_enable; + BitField<0, 8, u32> rop; + u32 beta1; + Beta4 beta4; + Operation operation; + union { + BitField<0, 6, u32> x; + BitField<8, 6, u32> y; + } pattern_offset; + BitField<0, 2, PatternSelect> pattern_select; + INSERT_UNION_PADDING_WORDS(0xC); + struct { + BitField<0, 3, MonochromePatternColorFormat> color_format; + BitField<0, 1, MonochromePatternFormat> format; + u32 color0; + u32 color1; + u32 pattern0; + u32 pattern1; + } monochrome_pattern; + struct { + std::array X8R8G8B8; + std::array R5G6B5; + std::array X1R5G5B5; + std::array Y8; + } color_pattern; + INSERT_UNION_PADDING_WORDS(0x10); + struct { + u32 prim_mode; + u32 prim_color_format; + u32 prim_color; + u32 line_tie_break_bits; + INSERT_UNION_PADDING_WORDS(0x14); + u32 prim_point_xy; + INSERT_UNION_PADDING_WORDS(0x7); + std::array prim_point; + } render_solid; + struct { + u32 data_type; + u32 color_format; + u32 index_format; + u32 mono_format; + u32 wrap; + u32 color0; + u32 color1; + u32 mono_opacity; + INSERT_UNION_PADDING_WORDS(0x6); + u32 src_width; + u32 src_height; + u32 dx_du_frac; + u32 dx_du_int; + u32 dx_dv_frac; + u32 dy_dv_int; + u32 dst_x0_frac; + u32 dst_x0_int; + u32 dst_y0_frac; + u32 dst_y0_int; + u32 data; + } pixels_from_cpu; + INSERT_UNION_PADDING_WORDS(0x3); + u32 big_endian_control; + INSERT_UNION_PADDING_WORDS(0x3); + struct { + BitField<0, 3, u32> block_shape; + BitField<0, 5, u32> corral_size; + BitField<0, 1, u32> safe_overlap; union { - u32 raw; BitField<0, 1, Origin> origin; BitField<4, 1, Filter> filter; - } blit_control; - + } sample_mode; INSERT_UNION_PADDING_WORDS(0x8); - - u32 blit_dst_x; - u32 blit_dst_y; - u32 blit_dst_width; - u32 blit_dst_height; - u64 blit_du_dx; - u64 blit_dv_dy; - u64 blit_src_x; - u64 blit_src_y; - - INSERT_UNION_PADDING_WORDS(0x21); - }; - std::array reg_array; + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 du_dx; + s64 dv_dy; + s64 src_x0; + s64 src_y0; + } pixels_from_memory; }; + std::array reg_array; } regs{}; struct Config { - Operation operation{}; - Filter filter{}; - Common::Rectangle src_rect; - Common::Rectangle dst_rect; + Operation operation; + Filter filter; + s32 dst_x0; + s32 dst_y0; + s32 dst_x1; + s32 dst_y1; + s32 src_x0; + s32 src_y0; + s32 src_x1; + s32 src_y1; }; private: @@ -156,25 +303,49 @@ private: /// Performs the copy from the source surface to the destination surface as configured in the /// registers. - void HandleSurfaceCopy(); + void Blit(); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(dst, 0x80); -ASSERT_REG_POSITION(src, 0x8C); -ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(blit_control, 0x223); -ASSERT_REG_POSITION(blit_dst_x, 0x22c); -ASSERT_REG_POSITION(blit_dst_y, 0x22d); -ASSERT_REG_POSITION(blit_dst_width, 0x22e); -ASSERT_REG_POSITION(blit_dst_height, 0x22f); -ASSERT_REG_POSITION(blit_du_dx, 0x230); -ASSERT_REG_POSITION(blit_dv_dy, 0x232); -ASSERT_REG_POSITION(blit_src_x, 0x234); -ASSERT_REG_POSITION(blit_src_y, 0x236); +ASSERT_REG_POSITION(object, 0x0); +ASSERT_REG_POSITION(no_operation, 0x100); +ASSERT_REG_POSITION(notify, 0x104); +ASSERT_REG_POSITION(wait_for_idle, 0x110); +ASSERT_REG_POSITION(pm_trigger, 0x140); +ASSERT_REG_POSITION(context_dma_notify, 0x180); +ASSERT_REG_POSITION(dst_context_dma, 0x184); +ASSERT_REG_POSITION(src_context_dma, 0x188); +ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); +ASSERT_REG_POSITION(dst, 0x200); +ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); +ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); +ASSERT_REG_POSITION(src, 0x230); +ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); +ASSERT_REG_POSITION(num_tpcs, 0x260); +ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); +ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); +ASSERT_REG_POSITION(clip_x0, 0x280); +ASSERT_REG_POSITION(clip_y0, 0x284); +ASSERT_REG_POSITION(clip_width, 0x288); +ASSERT_REG_POSITION(clip_height, 0x28c); +ASSERT_REG_POSITION(clip_enable, 0x290); +ASSERT_REG_POSITION(color_key_format, 0x294); +ASSERT_REG_POSITION(color_key, 0x298); +ASSERT_REG_POSITION(rop, 0x2A0); +ASSERT_REG_POSITION(beta1, 0x2A4); +ASSERT_REG_POSITION(beta4, 0x2A8); +ASSERT_REG_POSITION(operation, 0x2AC); +ASSERT_REG_POSITION(pattern_offset, 0x2B0); +ASSERT_REG_POSITION(pattern_select, 0x2B4); +ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); +ASSERT_REG_POSITION(color_pattern, 0x300); +ASSERT_REG_POSITION(render_solid, 0x580); +ASSERT_REG_POSITION(pixels_from_cpu, 0x800); +ASSERT_REG_POSITION(big_endian_control, 0x870); +ASSERT_REG_POSITION(pixels_from_memory, 0x880); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { - const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); - ASSERT(cbuf_mask[regs.tex_cb_index]); - - const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; - ASSERT(texinfo.Address() != 0); - - const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); - ASSERT(address < texinfo.Address() + texinfo.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(address)}; - return GetTextureInfo(tex_handle); -} - -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -209,11 +209,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - Texture::FullTextureInfo GetTexture(std::size_t offset) const; - - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include #include "common/assert.h" @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume OnMemoryWrite(); } return; + case MAXWELL3D_REG_INDEX(fragment_barrier): + return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return rasterizer->TiledCacheBarrier(); } } @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() { } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { - const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { - const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - -Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { - const auto stage_index = static_cast(stage); - const auto& shader = state.shader_stages[stage_index]; - const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; - - return GetTextureInfo(tex_handle); -} - u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; } void Maxwell3D::ProcessClearBuffers() { - ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && - regs.clear_buffers.R == regs.clear_buffers.B && - regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer->Clear(); } @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); - return result; + return memory_manager.Read(buffer.address + offset); } SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -438,16 +438,6 @@ public: DecrWrapOGL = 0x8508, }; - enum class MemoryLayout : u32 { - Linear = 0, - BlockLinear = 1, - }; - - enum class InvMemoryLayout : u32 { - BlockLinear = 0, - Linear = 1, - }; - enum class CounterReset : u32 { SampleCnt = 0x01, Unk02 = 0x02, @@ -589,21 +579,31 @@ public: NegativeW = 7, }; + enum class SamplerIndex : u32 { + Independently = 0, + ViaHeaderIndex = 1, + }; + + struct TileMode { + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + BitField<12, 1, u32> is_pitch_linear; + BitField<16, 1, u32> is_3d; + }; + }; + static_assert(sizeof(TileMode) == 4); + struct RenderTargetConfig { u32 address_high; u32 address_low; u32 width; u32 height; Tegra::RenderTargetFormat format; + TileMode tile_mode; union { - BitField<0, 3, u32> block_width; - BitField<4, 3, u32> block_height; - BitField<8, 3, u32> block_depth; - BitField<12, 1, InvMemoryLayout> type; - BitField<16, 1, u32> is_3d; - } memory_layout; - union { - BitField<0, 16, u32> layers; + BitField<0, 16, u32> depth; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -832,7 +832,11 @@ public: u32 patch_vertices; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x4); + + u32 fragment_barrier; + + INSERT_UNION_PADDING_WORDS(0x7); std::array scissor_test; @@ -842,7 +846,15 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x5); + + u32 invalidate_texture_data_cache; + + INSERT_UNION_PADDING_WORDS(0x1); + + u32 tiled_cache_barrier; + + INSERT_UNION_PADDING_WORDS(0x4); u32 color_mask_common; @@ -866,12 +878,7 @@ public: u32 address_high; u32 address_low; Tegra::DepthFormat format; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - BitField<20, 1, InvMemoryLayout> type; - } memory_layout; + TileMode tile_mode; u32 layer_stride; GPUVAddr Address() const { @@ -880,7 +887,18 @@ public: } } zeta; - INSERT_UNION_PADDING_WORDS(0x41); + struct { + union { + BitField<0, 16, u32> x; + BitField<16, 16, u32> width; + }; + union { + BitField<0, 16, u32> y; + BitField<16, 16, u32> height; + }; + } render_area; + + INSERT_UNION_PADDING_WORDS(0x3F); union { BitField<0, 4, u32> stencil; @@ -921,7 +939,7 @@ public: BitField<25, 3, u32> map_7; }; - u32 GetMap(std::size_t index) const { + u32 Map(std::size_t index) const { const std::array maps{map_0, map_1, map_2, map_3, map_4, map_5, map_6, map_7}; ASSERT(index < maps.size()); @@ -934,11 +952,13 @@ public: u32 zeta_width; u32 zeta_height; union { - BitField<0, 16, u32> zeta_layers; + BitField<0, 16, u32> zeta_depth; BitField<16, 1, u32> zeta_volume; }; - INSERT_UNION_PADDING_WORDS(0x26); + SamplerIndex sampler_index; + + INSERT_UNION_PADDING_WORDS(0x25); u32 depth_test_enable; @@ -964,6 +984,7 @@ public: float b; float a; } blend_color; + INSERT_UNION_PADDING_WORDS(0x4); struct { @@ -1001,7 +1022,12 @@ public: float line_width_smooth; float line_width_aliased; - INSERT_UNION_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1B); + + u32 invalidate_sampler_cache_no_wfi; + u32 invalidate_texture_header_cache_no_wfi; + + INSERT_UNION_PADDING_WORDS(0x2); u32 vb_element_base; u32 vb_base_instance; @@ -1045,13 +1071,13 @@ public: } condition; struct { - u32 tsc_address_high; - u32 tsc_address_low; - u32 tsc_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TSCAddress() const { - return static_cast( - (static_cast(tsc_address_high) << 32) | tsc_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tsc; @@ -1062,13 +1088,13 @@ public: u32 line_smooth_enable; struct { - u32 tic_address_high; - u32 tic_address_low; - u32 tic_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TICAddress() const { - return static_cast( - (static_cast(tic_address_high) << 32) | tic_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tic; @@ -1397,12 +1423,6 @@ public: void FlushMMEInlineDraw(); - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - - /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); +ASSERT_REG_POSITION(fragment_barrier, 0x378); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); +ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(depth_bounds, 0x3E7); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); +ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); -ASSERT_REG_POSITION(zeta_layers, 0x48c); +ASSERT_REG_POSITION(zeta_depth, 0x48c); +ASSERT_REG_POSITION(sampler_index, 0x48D); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(line_width_smooth, 0x4EC); ASSERT_REG_POSITION(line_width_aliased, 0x4ED); +ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); +ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); UNIMPLEMENTED_IF(regs.src_params.layer != 0); @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { } void MaxwellDMA::CopyPitchToBlockLinear() { + UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); + const auto& dst_params = regs.dst_params; const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; const u32 width = dst_params.width; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c5f26896e..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "core/core.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -47,6 +48,11 @@ protected: template class FenceManager { public: + /// Notify the fence manager about a new frame + void TickFrame() { + delayed_destruction_ring.Tick(); + } + void SignalSemaphore(GPUVAddr addr, u32 value) { TryReleasePendingFences(); const bool should_flush = ShouldFlush(); @@ -86,7 +92,7 @@ public: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -132,7 +138,7 @@ private: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -158,7 +164,14 @@ private: query_cache.CommitAsyncFlushes(); } + void PopFence() { + delayed_destruction_ring.Push(std::move(fences.front())); + fences.pop(); + } + std::queue fences; + + DelayedDestructionRing delayed_destruction_ring; }; } // namespace VideoCommon diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); + const std::optional cpu_addr = GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -1,250 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/morton.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCore { - -using Surface::GetBytesPerPixel; -using Surface::PixelFormat; - -using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); -using ConversionArray = std::array; - -template -static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, - u32 tile_width_spacing, u8* buffer, u8* addr) { - constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); - - // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual - // pixel values. - constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; - constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; - - if constexpr (morton_to_linear) { - Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, - stride, height, depth, block_height, block_depth, - tile_width_spacing); - } else { - Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, - bytes_per_pixel, bytes_per_pixel, addr, buffer, false, - block_height, block_depth, tile_width_spacing); - } -} - -static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, -}; - -static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - // TODO(Subv): Swizzling ASTC formats are not supported - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - nullptr, - nullptr, - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, -}; - -static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { - switch (mode) { - case MortonSwizzleMode::MortonToLinear: - return morton_to_linear_fns[static_cast(format)]; - case MortonSwizzleMode::LinearToMorton: - return linear_to_morton_fns[static_cast(format)]; - } - UNREACHABLE(); - return morton_to_linear_fns[static_cast(format)]; -} - -void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr) { - GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, - tile_width_spacing, buffer, addr); -} - -} // namespace VideoCore diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h @@ -1,18 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "video_core/surface.h" - -namespace VideoCore { - -enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; - -void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr); - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -76,6 +76,9 @@ public: /// Sync memory between guest and host. virtual void SyncGuestHost() = 0; + /// Unmap memory range + virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; @@ -83,6 +86,12 @@ public: /// Notify the host renderer to wait for previous primitive and compute operations. virtual void WaitForIdle() = 0; + /// Notify the host renderer to wait for reads and writes to render targets and flush caches. + virtual void FragmentBarrier() = 0; + + /// Notify the host renderer to make available previous render target writes. + virtual void TiledCacheBarrier() = 0; + /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; @@ -91,8 +100,7 @@ public: /// Attempt to use a faster method to perform a surface copy [[nodiscard]] virtual bool AccelerateSurfaceCopy( - const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { return false; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_) - : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, - std::make_unique(device_, stream_size_, true)}, - device{device_} { + const Device& device_, OGLStreamBuffer& stream_buffer_, + StateTracker& state_tracker) + : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { if (!device.HasFastBufferSubData()) { return; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -22,6 +22,7 @@ namespace OpenGL { class Device; class OGLStreamBuffer; class RasterizerOpenGL; +class StateTracker; class Buffer : public VideoCommon::BufferBlock { public: @@ -52,9 +53,10 @@ private: using GenericBufferCache = VideoCommon::BufferCache; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_); + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const Device& device, OGLStreamBuffer& stream_buffer, + StateTracker& state_tracker); ~OGLBufferCache(); BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = { +constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, - GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; - -constexpr std::array LimitSSBOs = { + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, +}; +constexpr std::array LIMIT_SSBOS = { GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; - -constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; - -constexpr std::array LimitImages = { + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, +}; +constexpr std::array LIMIT_SAMPLERS = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, +}; +constexpr std::array LIMIT_IMAGES = { GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, +}; template T GetInteger(GLenum pname) { @@ -76,8 +80,8 @@ std::vector GetExtensions() { return extensions; } -bool HasExtension(const std::vector& images, std::string_view extension) { - return std::find(images.begin(), images.end(), extension) != images.end(); +bool HasExtension(std::span extensions, std::string_view extension) { + return std::ranges::find(extensions, extension) != extensions.end(); } u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { std::array BuildMaxUniformBuffers() noexcept { std::array max; - std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), - [](GLenum pname) { return GetInteger(pname); }); + std::ranges::transform(LIMIT_UBOS, max.begin(), + [](GLenum pname) { return GetInteger(pname); }); return max; } @@ -115,9 +119,10 @@ std::array BuildBaseBindin for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, + LIMIT_SAMPLERS[stage])}; } u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); @@ -130,7 +135,7 @@ std::array BuildBaseBindin // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); // This is guaranteed to be at least 1. const u32 total_extracted_images = num_images / (NumStages - 1); @@ -142,7 +147,7 @@ std::array BuildBaseBindin continue; } bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); } // Compute doesn't care about any of this. @@ -188,6 +193,11 @@ bool IsASTCSupported() { return true; } +[[nodiscard]] bool IsDebugToolAttached(std::span extensions) { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); +} + } // Anonymous namespace Device::Device() @@ -206,9 +216,8 @@ Device::Device() "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - - uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); - shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); @@ -224,6 +233,7 @@ Device::Device() has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + has_debugging_tool_attached = IsDebugToolAttached(extensions); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -36,11 +36,11 @@ public: return GetBaseBindings(static_cast(shader_type)); } - std::size_t GetUniformBufferAlignment() const { + size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } - std::size_t GetShaderStorageBufferAlignment() const { + size_t GetShaderStorageBufferAlignment() const { return shader_storage_alignment; } @@ -104,6 +104,10 @@ public: return has_nv_viewport_array2; } + bool HasDebuggingToolAttached() const { + return has_debugging_tool_attached; + } + bool UseAssemblyShaders() const { return use_assembly_shaders; } @@ -118,8 +122,8 @@ private: std::array max_uniform_buffers{}; std::array base_bindings{}; - std::size_t uniform_buffer_alignment{}; - std::size_t shader_storage_alignment{}; + size_t uniform_buffer_alignment{}; + size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; @@ -135,6 +139,7 @@ private: bool has_precise_bug{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -46,7 +46,7 @@ void GLInnerFence::Wait() { } FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, + Tegra::GPU& gpu_, TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -33,12 +33,12 @@ private: using Fence = std::shared_ptr; using GenericFenceManager = - VideoCommon::FenceManager; + VideoCommon::FenceManager; class FenceManagerOpenGL final : public GenericFenceManager { public: explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, + TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_); protected: diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using VideoCore::Surface::SurfaceType; - -FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; - -FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; - -GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { - const auto [entry, is_cache_miss] = cache.try_emplace(key); - auto& framebuffer{entry->second}; - if (is_cache_miss) { - framebuffer = CreateFramebuffer(key); - } - return framebuffer.handle; -} - -OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { - OGLFramebuffer framebuffer; - framebuffer.Create(); - - // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - - if (key.zeta) { - const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; - const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; - key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - } - - std::size_t num_buffers = 0; - std::array targets; - - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (!key.colors[index]) { - targets[index] = GL_NONE; - continue; - } - const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast(index); - key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - - const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; - targets[index] = GL_COLOR_ATTACHMENT0 + attachment; - num_buffers = index + 1; - } - - if (num_buffers > 0) { - glDrawBuffers(static_cast(num_buffers), std::data(targets)); - } else { - glDrawBuffer(GL_NONE); - } - - return framebuffer; -} - -std::size_t FramebufferCacheKey::Hash() const noexcept { - std::size_t hash = std::hash{}(zeta); - for (const auto& color : colors) { - hash ^= std::hash{}(color); - } - hash ^= static_cast(color_attachments) << 16; - return hash; -} - -bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(colors, zeta, color_attachments) == - std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace OpenGL { - -constexpr std::size_t BitsPerAttachment = 4; - -struct FramebufferCacheKey { - View zeta; - std::array colors; - u32 color_attachments = 0; - - std::size_t Hash() const noexcept; - - bool operator==(const FramebufferCacheKey& rhs) const noexcept; - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - void SetAttachment(std::size_t index, u32 attachment) { - color_attachments |= attachment << (BitsPerAttachment * index); - } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash { - std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace OpenGL { - -class FramebufferCacheOpenGL { -public: - FramebufferCacheOpenGL(); - ~FramebufferCacheOpenGL(); - - GLuint GetFramebuffer(const FramebufferCacheKey& key); - -private: - OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - - std::unordered_map cache; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,12 +25,15 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { -constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = +constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = +constexpr size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; + +constexpr size_t MAX_TEXTURES = 192; +constexpr size_t MAX_IMAGES = 48; + +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; template -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - ShaderType shader_type, std::size_t index = 0) { +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + ShaderType shader_type, size_t index = 0) { if constexpr (std::is_same_v) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry const u32 offset_2 = entry.secondary_offset; const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(handle); - } - - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 offset = entry.offset + static_cast(index * gpu_profile.GetTextureHandlerSize()); - if constexpr (std::is_same_v) { - return engine.GetStageTexture(shader_type, offset); - } else { - return engine.GetTexture(offset); + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, if (!entry.IsIndirect()) { return entry.GetSize(); } - if (buffer.size > Maxwell::MaxConstBufferSize) { LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, Maxwell::MaxConstBufferSize); @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss reinterpret_cast(ssbos)); } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::Buffer; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Core::Memory::Memory& cpu_memory_, const Device& device_, ScreenInfo& screen_info_, ProgramManager& program_manager_, StateTracker& state_tracker_) - : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), + stream_buffer(device, state_tracker), + texture_cache_runtime(device, program_manager, state_tracker), + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), + buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), async_shaders(emu_window_) { - CheckExtensions(); - unified_uniform_buffer.Create(); glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra nullptr, 0); } } - if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); } @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::CheckExtensions() { - if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { - LOG_WARNING( - Render_OpenGL, - "Anisotropic filter is not supported! This can cause graphical issues in some games."); - } -} - void RasterizerOpenGL::SetupVertexFormat() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { return info.offset; } -void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { +void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); u32 clip_distances = 0; + std::array shaders{}; + image_view_indices.clear(); + sampler_handles.clear(); + + texture_cache.SynchronizeGraphicsDescriptors(); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast(index)}; @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } continue; } - // Currently this stages are not supported in the OpenGL backend. // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL if (program == Maxwell::ShaderProgram::TesselationControl || @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, shader_config.enable.Value(), shader_config.offset); + break; } // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; + const size_t stage = index == 0 ? 0 : index - 1; + shaders[stage] = shader; + SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + SetupDrawTextures(shader, stage); + SetupDrawImages(shader, stage); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { ++index; } } - SyncClipEnabled(clip_distances); maxwell3d.dirty.flags[Dirty::Shaders] = false; + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader* const shader = shaders[stage]; + if (shader) { + const auto base = device.GetBaseBindings(stage); + BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, + texture_index, image_index); + } + } } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s shader_cache.LoadDiskCache(title_id, stop_loading, callback); } -void RasterizerOpenGL::ConfigureFramebuffers() { - MICROPROFILE_SCOPE(OpenGL_Framebuffer); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { - return; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - View depth_surface = texture_cache.GetDepthBufferSurface(true); - - const auto& regs = maxwell3d.regs; - UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); - - // Bind the framebuffer surfaces - FramebufferCacheKey key; - const auto colors_count = static_cast(regs.rt_control.count); - for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index, true)}; - if (!color_surface) { - continue; - } - // Assume that a surface will be written to if it is used as a framebuffer, even - // if the shader doesn't actually write to it. - texture_cache.MarkColorBufferInUse(index); - - key.SetAttachment(index, regs.rt_control.GetMap(index)); - key.colors[index] = std::move(color_surface); - } - - if (depth_surface) { - // Assume that a surface will be written to if it is used as a framebuffer, even if - // the shader doesn't actually write to it. - texture_cache.MarkDepthBufferInUse(); - key.zeta = std::move(depth_surface); - } - - texture_cache.GuardRenderTargets(false); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - const auto& regs = maxwell3d.regs; - - texture_cache.GuardRenderTargets(true); - View color_surface; - - if (using_color) { - // Determine if we have to preserve the contents. - // First we have to make sure all clear masks are enabled. - bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || - !regs.clear_buffers.B || !regs.clear_buffers.A; - const std::size_t index = regs.clear_buffers.RT; - if (regs.clear_flags.scissor) { - // Then we have to confirm scissor testing clears the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.rt[index].width; - preserve_contents |= scissor.max_y < regs.rt[index].height; - } - - color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); - texture_cache.MarkColorBufferInUse(index); - } - - View depth_surface; - if (using_depth_stencil) { - bool preserve_contents = false; - if (regs.clear_flags.scissor) { - // For depth stencil clears we only have to confirm scissor test covers the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.zeta_width; - preserve_contents |= scissor.max_y < regs.zeta_height; - } - - depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); - texture_cache.MarkDepthBufferInUse(); - } - texture_cache.GuardRenderTargets(false); - - FramebufferCacheKey key; - key.colors[0] = std::move(color_surface); - key.zeta = std::move(depth_surface); - - state_tracker.NotifyFramebuffer(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - void RasterizerOpenGL::Clear() { if (!maxwell3d.ShouldExecute()) { return; @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { regs.clear_buffers.A) { use_color = true; - state_tracker.NotifyColorMask0(); - glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + const GLuint index = regs.clear_buffers.RT; + state_tracker.NotifyColorMask(index); + glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); // TODO(Rodrigo): Determine if clamping is used on clears @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { state_tracker.NotifyScissor0(); glDisablei(GL_SCISSOR_TEST, 0); } - UNIMPLEMENTED_IF(regs.clear_flags.viewport); - ConfigureClearFramebuffer(use_color, use_depth || use_stencil); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + } if (use_color) { - glClearBufferfv(GL_COLOR, 0, regs.clear_color); + glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } - if (use_depth && use_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); } else if (use_depth) { @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. - const bool invalidated = buffer_cache.Map(buffer_size); - - if (invalidated) { - // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::VertexBuffers] = true; - for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { - dirty[index] = true; - } - } + buffer_cache.Map(buffer_size); // Prepare vertex array format. SetupVertexFormat(); @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Setup shaders and their used resources. - texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - SetupShaders(primitive_mode); - texture_cache.GuardSamplers(false); - - ConfigureFramebuffers(); + auto lock = texture_cache.AcquireLock(); + SetupShaders(); // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); program_manager.BindGraphicsPipeline(); - if (texture_cache.TextureBarrier()) { - glTextureBarrier(); - } - + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Acquire(); current_cbuf = 0; - auto kernel = shader_cache.GetComputeKernel(code_addr); - program_manager.BindCompute(kernel->GetHandle()); + Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - SetupComputeTextures(kernel); - SetupComputeImages(kernel); + auto lock = texture_cache.AcquireLock(); + BindComputeTextures(kernel); - const std::size_t buffer_size = - Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); + const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); SetupComputeConstBuffers(kernel); @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = kepler_compute.launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); shader_cache.SyncGuestHost(); } +void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + shader_cache.OnCPUWrite(addr, size); +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); } +void RasterizerOpenGL::FragmentBarrier() { + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); +} + +void RasterizerOpenGL::TiledCacheBarrier() { + glTextureBarrier(); +} + void RasterizerOpenGL::FlushCommands() { // Only flush when we have commands queued to OpenGL. if (num_queued_commands == 0) { @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() { // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. num_queued_commands = 0; + fence_manager.TickFrame(); buffer_cache.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { - if (!framebuffer_addr) { - return {}; + if (framebuffer_addr == 0) { + return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { - return {}; + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; + if (!image_view) { + return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); + // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - if (params.pixel_format != pixel_format) { - LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); - } + screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); + return true; +} - screen_info.display_texture = surface->GetTexture(); - screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; +void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { + image_view_indices.clear(); + sampler_handles.clear(); - return true; + texture_cache.SynchronizeComputeDescriptors(); + + SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + program_manager.BindCompute(kernel->GetHandle()); + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); +} + +void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, + GLuint base_image, size_t& image_view_index, + size_t& texture_index, size_t& image_index) { + const GLuint* const samplers = sampler_handles.data() + texture_index; + const GLuint* const textures = texture_handles.data() + texture_index; + const GLuint* const images = image_handles.data() + image_index; + + const size_t num_samplers = entries.samplers.size(); + for (const auto& sampler : entries.samplers) { + for (size_t i = 0; i < sampler.size; ++i) { + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); + texture_handles[texture_index++] = handle; + } + } + const size_t num_images = entries.images.size(); + for (size_t unit = 0; unit < num_images; ++unit) { + // TODO: Mark as modified + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); + image_handles[image_index] = handle; + ++image_index; + } + if (num_samplers > 0) { + glBindSamplers(base_texture, static_cast(num_samplers), samplers); + glBindTextures(base_texture, static_cast(num_samplers), textures); + } + if (num_images > 0) { + glBindImageTextures(base_image, static_cast(num_images), images); + } } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e } } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = device.GetBaseBindings(stage_index).sampler; +void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast(stage_index); - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const auto handle = + GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); + const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = 0; +void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : kernel->GetEntries().samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); + for (size_t i = 0; i < entry.size; ++i) { + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); + const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); - if (!view) { - // Can occur when texture addr is null or its memory is unmapped/invalid - glBindSampler(binding, 0); - glBindTextureUnit(binding, 0); - return; - } - const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - glBindTextureUnit(binding, handle); - if (!view->GetSurfaceParams().IsBuffer()) { - glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); - } -} - -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - u32 binding = device.GetBaseBindings(stage_index).image; +void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().images) { const auto shader_type = static_cast(stage_index); - const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(binding++, tic, entry); + const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - u32 binding = 0; +void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; - SetupImage(binding++, tic, entry); + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const ImageEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - if (!view) { - glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); - return; - } - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); -} - void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { flags[Dirty::PointSize] = false; oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); + oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - if (maxwell3d.regs.vp_point_size.enable) { - // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. - glEnable(GL_PROGRAM_POINT_SIZE); - return; - } - - // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid - // in OpenGL). glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); - glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,12 +7,13 @@ #include #include #include -#include #include #include #include #include +#include + #include #include "common/common_types.h" @@ -23,16 +24,14 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" @@ -51,7 +50,7 @@ class MemoryManager; namespace OpenGL { struct ScreenInfo; -struct DrawParameters; +struct ShaderEntries; struct BindlessSSBO { GLuint64EXT address; @@ -79,15 +78,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -108,11 +110,14 @@ public: } private: - /// Configures the color and depth framebuffer states. - void ConfigureFramebuffers(); + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + void BindComputeTextures(Shader* kernel); - /// Configures the color and depth framebuffer for clearing. - void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); + void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, + size_t& image_view_index, size_t& texture_index, size_t& image_index); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); @@ -136,23 +141,16 @@ private: size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, Shader* shader); + void SetupDrawTextures(const Shader* shader, size_t stage_index); /// Configures the textures used in a compute shader. - void SetupComputeTextures(Shader* kernel); - - /// Configures a texture. - void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry); + void SetupComputeTextures(const Shader* kernel); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, Shader* shader); + void SetupDrawImages(const Shader* shader, size_t stage_index); /// Configures images in a compute shader. - void SetupComputeImages(Shader* shader); - - /// Configures an image. - void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); + void SetupComputeImages(const Shader* shader); /// Syncs the viewport and depth range to match the guest state void SyncViewport(); @@ -227,9 +225,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - /// Check for extension that are not strictly required but are needed for correct emulation - void CheckExtensions(); - std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; @@ -242,7 +237,7 @@ private: GLintptr SetupIndexBuffer(); - void SetupShaders(GLenum primitive_mode); + void SetupShaders(); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -254,19 +249,21 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - TextureCacheOpenGL texture_cache; + OGLStreamBuffer stream_buffer; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; ShaderCacheOpenGL shader_cache; - SamplerCacheOpenGL sampler_cache; - FramebufferCacheOpenGL framebuffer_cache; QueryCache query_cache; OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; VideoCommon::Shader::AsyncShaders async_shaders; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - - GLint vertex_binding = 0; + boost::container::static_vector image_view_indices; + std::array image_view_ids; + boost::container::static_vector sampler_handles; + std::array texture_handles; + std::array image_handles; std::array transform_feedback_buffers; @@ -280,7 +277,7 @@ private: std::size_t current_cbuf = 0; OGLBuffer unified_uniform_buffer; - /// Number of commands queued to the OpenGL driver. Reseted on flush. + /// Number of commands queued to the OpenGL driver. Resetted on flush. std::size_t num_queued_commands = 0; u32 last_clip_distance_mask = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -71,7 +71,7 @@ void OGLSampler::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); + glCreateSamplers(1, &handle); } void OGLSampler::Release() { diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - -namespace OpenGL { - -SamplerCacheOpenGL::SamplerCacheOpenGL() = default; - -SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; - -OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - OGLSampler sampler; - sampler.Create(); - - const GLuint sampler_id{sampler.handle}; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); - } else { - LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); - } - - return sampler; -} - -GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { - return sampler.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/sampler_cache.h" - -namespace OpenGL { - -class SamplerCacheOpenGL final : public VideoCommon::SamplerCache { -public: - explicit SamplerCacheOpenGL(); - ~SamplerCacheOpenGL(); - -protected: - OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - GLuint ToSamplerType(const OGLSampler& sampler) const override; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::TextureType; -using VideoCommon::Shader::BuildTransformFeedback; -using VideoCommon::Shader::Registry; -using namespace std::string_literals; using namespace VideoCommon::Shader; +using namespace std::string_literals; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; @@ -2753,11 +2751,11 @@ private: } } - std::string GetSampler(const Sampler& sampler) const { + std::string GetSampler(const SamplerEntry& sampler) const { return AppendSuffix(sampler.index, "sampler"); } - std::string GetImage(const Image& image) const { + std::string GetImage(const ImageEntry& image) const { return AppendSuffix(image.index, "image"); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,8 +20,8 @@ namespace OpenGL { class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::Sampler; -using ImageEntry = VideoCommon::Shader::Image; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { } } +void ProgramManager::BindHostCompute(GLuint program) { + if (use_assembly_programs) { + glDisable(GL_COMPUTE_PROGRAM_NV); + } + glUseProgram(program); + is_graphics_bound = false; +} + +void ProgramManager::RestoreGuestCompute() { + if (use_assembly_programs) { + glEnable(GL_COMPUTE_PROGRAM_NV); + glUseProgram(0); + } +} + void ProgramManager::UseVertexShader(GLuint program) { if (use_assembly_programs) { BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,6 +45,12 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); + /// Binds an OpenGL GLSL program object unsynchronized with the guest state. + void BindHostCompute(GLuint program); + + /// Rewinds BindHostCompute state changes. + void RestoreGuestCompute(); + void UseVertexShader(GLuint program); void UseGeometryShader(GLuint program); void UseFragmentShader(GLuint program); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} } } +void StateTracker::InvalidateStreamBuffer() { + flags[Dirty::VertexBuffers] = true; + for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { + flags[index] = true; + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -92,6 +92,8 @@ class StateTracker { public: explicit StateTracker(Tegra::GPU& gpu); + void InvalidateStreamBuffer(); + void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { return; @@ -100,6 +102,14 @@ public: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); } + void BindFramebuffer(GLuint new_framebuffer) { + if (framebuffer == new_framebuffer) { + return; + } + framebuffer = new_framebuffer; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + } + void NotifyScreenDrawVertexArray() { flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; @@ -129,9 +139,9 @@ public: flags[OpenGL::Dirty::Scissor0] = true; } - void NotifyColorMask0() { + void NotifyColorMask(size_t index) { flags[OpenGL::Dirty::ColorMasks] = true; - flags[OpenGL::Dirty::ColorMask0] = true; + flags[OpenGL::Dirty::ColorMask0 + index] = true; } void NotifyBlend0() { @@ -190,6 +200,7 @@ public: private: Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; + GLuint framebuffer = 0; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) - : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) + : state_tracker{state_tracker_} { gl_buffer.Create(); - GLsizeiptr allocate_size = size; - if (vertex_data_usage) { - // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer - // read position is near the end and is an out-of-bound access to the vertex buffer. This is - // probably a bug in the driver and is related to the usage of vec3 attributes in the - // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the - // crash. - allocate_size *= 2; - } - static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); mapped_ptr = static_cast( - glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { gl_buffer.Release(); } -std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= buffer_size); - ASSERT(alignment <= buffer_size); +std::pair OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { + ASSERT(size <= BUFFER_SIZE); + ASSERT(alignment <= BUFFER_SIZE); mapped_size = size; if (alignment > 0) { buffer_pos = Common::AlignUp(buffer_pos, alignment); } - bool invalidate = false; - if (buffer_pos + size > buffer_size) { + if (buffer_pos + size > BUFFER_SIZE) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); glInvalidateBufferData(gl_buffer.handle); + state_tracker.InvalidateStreamBuffer(); buffer_pos = 0; - invalidate = true; } - return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); + return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -4,29 +4,31 @@ #pragma once -#include +#include + #include + #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { class Device; +class StateTracker; class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); ~OGLStreamBuffer(); /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. + * The return values are the pointer to the new chunk, and the offset within the buffer. * The actual used size must be specified on unmapping the chunk. */ - std::tuple Map(GLsizeiptr size, GLintptr alignment = 0); + std::pair Map(GLsizeiptr size, GLintptr alignment = 0); void Unmap(GLsizeiptr size); @@ -39,15 +41,18 @@ public: } GLsizeiptr Size() const noexcept { - return buffer_size; + return BUFFER_SIZE; } private: + static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; + + StateTracker& state_tracker; + OGLBuffer gl_buffer; GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; - GLsizeiptr buffer_size = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -2,173 +2,238 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include +#include +#include +#include + +#include + +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/surface_base.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/texture.h" +#include "video_core/textures/decoders.h" namespace OpenGL { -using Tegra::Texture::SwizzleSource; -using VideoCore::MortonSwizzleMode; +namespace { +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureMipmapFilter; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCommon::CalculateLevelStrideAlignment; +using VideoCommon::ImageCopy; +using VideoCommon::ImageFlagBits; +using VideoCommon::ImageType; +using VideoCommon::NUM_RT; +using VideoCommon::SamplesLog2; +using VideoCommon::SwizzleParameters; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatSRGB; +using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", - MP_RGB(128, 192, 128)); +struct CopyOrigin { + GLint level; + GLint x; + GLint y; + GLint z; +}; -namespace { +struct CopyRegion { + GLsizei width; + GLsizei height; + GLsizei depth; +}; struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; + GLenum store_format = internal_format; }; -constexpr std::array tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; +constexpr std::array ACCELERATED_FORMATS{ + GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, + GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, + GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, + GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, + GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, + GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, + GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, +}; + const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast(pixel_format)]; + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; } -GLenum GetTextureTarget(const SurfaceTarget& target) { - switch (target) { - case SurfaceTarget::TextureBuffer: +GLenum ImageTarget(const VideoCommon::ImageInfo& info) { + switch (info.type) { + case ImageType::e1D: + return GL_TEXTURE_1D_ARRAY; + case ImageType::e2D: + if (info.num_samples > 1) { + return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + } + return GL_TEXTURE_2D_ARRAY; + case ImageType::e3D: + return GL_TEXTURE_3D; + case ImageType::Linear: + return GL_TEXTURE_2D_ARRAY; + case ImageType::Buffer: return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture1D: + } + UNREACHABLE_MSG("Invalid image type={}", info.type); + return GL_NONE; +} + +GLenum ImageTarget(ImageViewType type, int num_samples = 1) { + const bool is_multisampled = num_samples > 1; + switch (type) { + case ImageViewType::e1D: return GL_TEXTURE_1D; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: + case ImageViewType::e2D: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + case ImageViewType::Cube: + return GL_TEXTURE_CUBE_MAP; + case ImageViewType::e3D: return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: + case ImageViewType::e1DArray: return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: + case ImageViewType::e2DArray: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + case ImageViewType::CubeArray: return GL_TEXTURE_CUBE_MAP_ARRAY; + case ImageViewType::Rect: + return GL_TEXTURE_RECTANGLE; + case ImageViewType::Buffer: + return GL_TEXTURE_BUFFER; } - UNREACHABLE(); - return {}; + UNREACHABLE_MSG("Invalid image view type={}", type); + return GL_NONE; } -GLint GetSwizzleSource(SwizzleSource source) { +GLenum TextureMode(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + +GLint Swizzle(SwizzleSource source) { switch (source) { case SwizzleSource::Zero: return GL_ZERO; @@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) { case SwizzleSource::OneFloat: return GL_ONE; } - UNREACHABLE(); + UNREACHABLE_MSG("Invalid swizzle source={}", source); return GL_NONE; } -GLenum GetComponent(PixelFormat format, bool is_first) { - switch (format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; +GLenum AttachmentType(PixelFormat format) { + switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; default: - UNREACHABLE(); - return GL_DEPTH_COMPONENT; + UNIMPLEMENTED_MSG("Unimplemented type={}", type); + return GL_NONE; } } -void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { - if (params.IsBuffer()) { - return; +[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { + if (!device.HasASTC() && IsPixelFormatASTC(format)) { + return true; } - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast(params.num_levels - 1)); - if (params.num_levels == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); + switch (format) { + case PixelFormat::BC4_UNORM: + case PixelFormat::BC5_UNORM: + return type == ImageType::e3D; + default: + break; } + return false; } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, - OGLBuffer& texture_buffer) { - OGLTexture texture; - texture.Create(target); +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; + } +} - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), - nullptr, GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); +void ApplySwizzle(GLuint handle, PixelFormat format, std::array swizzle) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + TextureMode(format, swizzle[0] == SwizzleSource::R)); + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height); + default: break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height, params.depth); + } + std::array gl_swizzle; + std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); +} + +[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, + const VideoCommon::ImageInfo& info) { + // Disable accelerated uploads for now as they don't implement swizzled uploads + return false; + switch (info.type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: break; default: - UNREACHABLE(); + return false; + } + const GLenum internal_format = GetFormatTuple(info.format).internal_format; + const auto& format_info = runtime.FormatInfo(info.type, internal_format); + if (format_info.is_compressed) { + return false; + } + if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + return false; } + if (format_info.compatibility_by_size) { + return true; + } + const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); + const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; + return format_info.compatibility_class == store_class; +} - ApplyTextureDefaults(params, texture.handle); +[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, + VideoCommon::SubresourceLayers subresource, GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(subresource.base_layer), + }; + case GL_TEXTURE_3D: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(offset.z), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; + } +} - return texture; +[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, + VideoCommon::SubresourceLayers dst_subresource, + GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(dst_subresource.num_layers), + }; + case GL_TEXTURE_3D: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(extent.depth), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyRegion{.width = 0, .height = 0, .depth = 0}; + } } -constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); +void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { + if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + const GLuint texture = image_view->DefaultHandle(); + glNamedFramebufferTexture(fbo, attachment, texture, 0); + return; + } + const GLuint texture = image_view->Handle(ImageViewType::e3D); + if (image_view->range.extent.layers > 1) { + // TODO: OpenGL doesn't support rendering to a fixed number of slices + glNamedFramebufferTexture(fbo, attachment, texture, 0); + } else { + const u32 slice = image_view->range.base.layer; + glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); + } } } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : SurfaceBase{gpu_addr_, params_, is_astc_supported_} { - if (is_converted) { - internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; - format = GL_RGBA; - type = GL_UNSIGNED_BYTE; - } else { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = params.IsCompressed(); - } - target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format, texture_buffer); - DecorateSurfaceName(); +ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) + : span(map, size), sync{sync_}, handle{handle_} {} - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; +ImageBufferMap::~ImageBufferMap() { + if (sync) { + sync->Create(); } - - main_view = - CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); } -CachedSurface::~CachedSurface() = default; +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, + StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; + for (size_t i = 0; i < TARGETS.size(); ++i) { + const GLenum target = TARGETS[i]; + for (const FormatTuple& tuple : FORMAT_TABLE) { + const GLenum format = tuple.internal_format; + GLint compat_class; + GLint compat_type; + GLint is_compressed; + glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); + glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, + &compat_type); + glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); + const FormatProperties properties{ + .compatibility_class = static_cast(compat_class), + .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, + .is_compressed = is_compressed == GL_TRUE, + }; + format_properties[i].emplace(format, properties); + } + } + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); + null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); + null_image_3d.Create(GL_TEXTURE_3D); + null_image_rect.Create(GL_TEXTURE_RECTANGLE); + glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); + glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); + glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); + glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); + + std::array new_handles; + glGenTextures(static_cast(new_handles.size()), new_handles.data()); + null_image_view_1d.handle = new_handles[0]; + null_image_view_2d.handle = new_handles[1]; + null_image_view_2d_array.handle = new_handles[2]; + null_image_view_cube.handle = new_handles[3]; + glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, + 0, 1); + glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, + 1, 0, 1); + glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, + null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); + glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, + GL_R8, 0, 1, 0, 6); + const std::array texture_handles{ + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, + null_image_view_2d_array.handle, null_image_view_cube.handle, + }; + for (const GLuint handle : texture_handles) { + static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); + } + const auto set_view = [this](ImageViewType type, GLuint handle) { + if (device.HasDebuggingToolAttached()) { + const std::string name = fmt::format("NullImage {}", type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + null_image_views[static_cast(type)] = handle; + }; + set_view(ImageViewType::e1D, null_image_view_1d.handle); + set_view(ImageViewType::e2D, null_image_view_2d.handle); + set_view(ImageViewType::Cube, null_image_view_cube.handle); + set_view(ImageViewType::e3D, null_image_3d.handle); + set_view(ImageViewType::e1DArray, null_image_1d_array.handle); + set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); + set_view(ImageViewType::CubeArray, null_image_cube_array.handle); + set_view(ImageViewType::Rect, null_image_rect.handle); +} -void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Download); +TextureCacheRuntime::~TextureCacheRuntime() = default; - if (params.IsBuffer()) { - glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast(params.GetHostSizeInBytes(false)), - staging_buffer.data()); - return; - } +void TextureCacheRuntime::Finish() { + glFinish(); +} - SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} - for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} - u8* const mip_data = staging_buffer.data() + mip_offset; - const GLsizei size = static_cast(params.GetHostMipmapSize(level)); - if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, size, mip_data); - } else { - glGetTextureImage(texture.handle, level, format, type, size, mip_data); - } +void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, + std::span copies) { + const GLuint dst_name = dst_image.Handle(); + const GLuint src_name = src_image.Handle(); + const GLenum dst_target = ImageTarget(dst_image.info); + const GLenum src_target = ImageTarget(src_image.info); + for (const ImageCopy& copy : copies) { + const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); + const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); + const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); + glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, + src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, + dst_origin.y, dst_origin.z, region.width, region.height, region.depth); } } -void CachedSurface::UploadTexture(const std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Upload); - SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.emulated_levels; ++level) { - UploadTextureMipmap(level, staging_buffer); +bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + return false; } + return true; } -void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - const u8* buffer{staging_buffer.data() + mip_offset}; - if (is_compressed) { - const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D(texture.handle, level, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::TextureCubemap: { - const std::size_t host_layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), 1, - internal_format, - static_cast(host_layer_size), buffer); - buffer += host_layer_size; - } - break; - } - default: - UNREACHABLE(); - } +void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, + std::span copies) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + ASSERT(src.info.type == ImageType::e3D); + util_shaders.CopyBC4(dst, src, copies); } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, - buffer); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(level == 0); - glNamedBufferSubData(texture_buffer.handle, 0, - params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), - params.GetMipHeight(level), format, type, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D( - texture.handle, level, 0, 0, 0, static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), format, type, buffer); - break; - case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - params.GetMipWidth(level), params.GetMipHeight(level), 1, - format, type, buffer); - buffer += params.GetHostLayerSize(level); - } - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } } -void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); -} +void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + state_tracker.NotifyScissor0(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { - LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); + ASSERT(dst->BufferBits() == src->BufferBits()); + + glEnable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_RASTERIZER_DISCARD); + glDisablei(GL_SCISSOR_TEST, 0); + + const GLbitfield buffer_bits = dst->BufferBits(); + const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; + const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, + src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, + dst_region[1].x, dst_region[1].y, buffer_bits, + is_linear ? GL_LINEAR : GL_NEAREST); } -View CachedSurface::CreateView(const ViewParams& view_key) { - return CreateViewInner(view_key, false); +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, + size_t buffer_offset, + std::span swizzles) { + switch (image.info.type) { + case ImageType::e2D: + return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + case ImageType::e3D: + return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + case ImageType::Linear: + return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + default: + UNREACHABLE(); + break; + } } -View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { - auto view = std::make_shared(*this, view_key, is_proxy); - views[view_key] = view; - if (!is_proxy) - view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); - return view; +void TextureCacheRuntime::InsertUploadMemoryBarrier() { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, - bool is_proxy_) - : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, - target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { - if (!is_proxy_) { - main_view = CreateTextureView(); +FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { + switch (type) { + case ImageType::e1D: + return format_properties[0].at(internal_format); + case ImageType::e2D: + case ImageType::Linear: + return format_properties[1].at(internal_format); + case ImageType::e3D: + return format_properties[2].at(internal_format); + default: + UNREACHABLE(); + return FormatProperties{}; } } -CachedSurfaceView::~CachedSurfaceView() = default; +TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} -void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { - ASSERT(params.num_levels == 1); +TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - if (params.target == SurfaceTarget::Texture3D) { - if (params.num_layers > 1) { - ASSERT(params.base_layer == 0); - glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); - } else { - glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, - params.base_level, params.base_layer); - } - return; +ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, + bool insert_fence) { + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); +} + +size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional index = FindBuffer(requested_size); index) { + return *index; } - if (params.num_layers > 1) { - UNIMPLEMENTED_IF(params.base_layer != 0); - glFramebufferTexture(fb_target, attachment, GetTexture(), 0); - return; + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + glNamedBufferStorage(buffer.handle, requested_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, requested_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + + syncs.emplace_back(); + sizes.push_back(requested_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { + size_t smallest_buffer = std::numeric_limits::max(); + std::optional found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + GLint status; + glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); + if (status != GL_SIGNALED) { + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; } + return found; +} - const GLenum view_target = surface.GetTarget(); - const GLuint texture = surface.GetTexture(); - switch (surface.GetSurfaceParams().target) { - case SurfaceTarget::Texture1D: - glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); +Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { + if (CanBeAccelerated(runtime, info)) { + flags |= ImageFlagBits::AcceleratedUpload; + } + if (IsConverted(runtime.device, info.format, info.type)) { + flags |= ImageFlagBits::Converted; + gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + gl_store_format = GL_RGBA8; + gl_format = GL_RGBA; + gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } else { + const auto& tuple = GetFormatTuple(info.format); + gl_internal_format = tuple.internal_format; + gl_store_format = tuple.store_format; + gl_format = tuple.format; + gl_type = tuple.type; + } + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); break; - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, - params.base_layer); + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + buffer.Create(); + glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); break; default: - UNIMPLEMENTED(); + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this); + glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, + static_cast(name.size()), name.data()); } } -GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - if (GetSurfaceParams().IsBuffer()) { - return GetTexture(); - } - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (current_swizzle == new_swizzle) { - return current_view; - } - current_swizzle = new_swizzle; +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - OGLTextureView& view = entry->second; - if (!is_cache_miss) { - current_view = view.handle; - return view.handle; - } - view = CreateTextureView(); - current_view = view.handle; + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - std::array swizzle{x_source, y_source, z_source, w_source}; + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); - switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - case PixelFormat::S8_UINT_D24_UNORM: - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - GetComponent(pixel_format, x_source == SwizzleSource::R)); - - // Make sure we sample the first component - std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { - return value == SwizzleSource::G ? SwizzleSource::R : value; - }); - [[fallthrough]]; - default: { - const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), - GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; - glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - break; - } + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); + } + CopyBufferToImage(copy, buffer_offset); } - return view.handle; } -OGLTextureView CachedSurfaceView::CreateTextureView() const { - OGLTextureView texture_view; - texture_view.Create(); - - if (target == GL_TEXTURE_3D) { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, 0, 1); - } else { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, params.base_layer, params.num_layers); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, + copy.dst_offset, copy.size); } - ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); - - return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker_) - : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, - state_tracker{state_tracker_} { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} +void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API -TextureCacheOpenGL::~TextureCacheOpenGL() = default; + glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); + glPixelStorei(GL_PACK_ALIGNMENT, 1); -Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared(gpu_addr, params, is_astc_supported); -} + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); -void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); + } + CopyImageToBuffer(copy, buffer_offset); } - const auto src_handle = src_surface->GetTexture(); - const auto src_target = src_surface->GetTarget(); - const auto dst_handle = dst_surface->GetTexture(); - const auto dst_target = dst_surface->GetTarget(); - glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, - copy_params.source_y, copy_params.source_z, dst_handle, dst_target, - copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, - copy_params.dest_z, copy_params.width, copy_params.height, - copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - const auto& src_params{src_view->GetSurfaceParams()}; - const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.depth != 1); - UNIMPLEMENTED_IF(dst_params.depth != 1); - - state_tracker.NotifyScissor0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); +void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + const void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); - if (dst_params.srgb_conversion) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); + switch (info.type) { + case ImageType::e1D: + if (is_compressed) { + glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, + copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e2D: + case ImageType::Linear: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, + copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e3D: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, + copy.image_extent.height, copy.image_extent.depth, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, + copy.image_extent.width, copy.image_extent.height, + copy.image_extent.depth, gl_format, gl_type, offset); + } + break; + default: + UNREACHABLE(); } - glDisable(GL_RASTERIZER_DISCARD); - glDisablei(GL_SCISSOR_TEST, 0); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); - - GLenum buffers = 0; - if (src_params.type == SurfaceType::ColorTexture) { - src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +} - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + const GLint x_offset = copy.image_offset.x; + const GLsizei width = copy.image_extent.width; - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); + const GLint level = copy.image_subresource.base_level; + const GLsizei buffer_size = static_cast(copy.buffer_size); + void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); + GLint y_offset = 0; + GLint z_offset = 0; + GLsizei height = 1; + GLsizei depth = 1; - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + switch (info.type) { + case ImageType::e1D: + y_offset = copy.image_subresource.base_layer; + height = copy.image_subresource.num_layers; + break; + case ImageType::e2D: + case ImageType::Linear: + y_offset = copy.image_offset.y; + z_offset = copy.image_subresource.base_layer; + height = copy.image_extent.height; + depth = copy.image_subresource.num_layers; + break; + case ImageType::e3D: + y_offset = copy.image_offset.y; + z_offset = copy.image_offset.z; + height = copy.image_extent.height; + depth = copy.image_extent.depth; + break; + default: + UNREACHABLE(); + } + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + if (is_compressed) { + glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, + height, depth, buffer_size, offset); + } else { + glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, + depth, gl_format, gl_type, buffer_size, offset); } - - const Common::Rectangle& src_rect = copy_config.src_rect; - const Common::Rectangle& dst_rect = copy_config.dst_rect; - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - glBlitFramebuffer(static_cast(src_rect.left), static_cast(src_rect.top), - static_cast(src_rect.right), static_cast(src_rect.bottom), - static_cast(dst_rect.left), static_cast(dst_rect.top), - static_cast(dst_rect.right), static_cast(dst_rect.bottom), - buffers, - is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { - MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { + const Device& device = runtime.device; + if (True(image.flags & ImageFlagBits::Converted)) { + internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + } else { + internal_format = GetFormatTuple(format).internal_format; + } + VideoCommon::SubresourceRange flatten_range = info.range; + std::array handles; + stored_views.reserve(2); - const auto source_format = GetFormatTuple(src_params.pixel_format); - const auto dest_format = GetFormatTuple(dst_params.pixel_format); + switch (info.type) { + case ImageViewType::e1DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e1D: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + break; + case ImageViewType::e2DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e2D: + if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { + // 2D and 2D array views on a 3D textures are used exclusively for render targets + ASSERT(info.range.extent.levels == 1); + const VideoCommon::SubresourceRange slice_range{ + .base = {.level = info.range.base.level, .layer = 0}, + .extent = {.levels = 1, .layers = 1}, + }; + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); + break; + } + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); + break; + case ImageViewType::e3D: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + break; + case ImageViewType::CubeArray: + flatten_range.extent.layers = 6; + [[fallthrough]]; + case ImageViewType::Cube: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + break; + case ImageViewType::Rect: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + break; + case ImageViewType::Buffer: + glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); + SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + break; + } + default_handle = Handle(info.type); +} - const std::size_t source_size = src_surface->GetHostSizeInBytes(); - const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} - const std::size_t buffer_size = std::max(source_size, dest_size); +void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, + GLuint handle, const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range) { + if (info.type == ImageViewType::Buffer) { + // TODO: Take offset from buffer cache + glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, + image.guest_size_bytes); + } else { + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); + } + } + if (device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this, view_type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + stored_views.emplace_back().handle = handle; + views[static_cast(view_type)] = handle; +} - GLuint copy_pbo_handle = FetchPBO(buffer_size); +Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { + const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; + const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); + const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); + const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); + const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); + const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; + + UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); + UNIMPLEMENTED_IF(config.float_coord_normalization != 0); + + sampler.Create(); + const GLuint handle = sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } +} - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. + GLuint handle; + glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + + GLsizei num_buffers = 0; + std::array gl_draw_buffers; + gl_draw_buffers.fill(GL_NONE); + + for (size_t index = 0; index < color_buffers.size(); ++index) { + const ImageView* const image_view = color_buffers[index]; + if (!image_view) { + continue; + } + buffer_bits |= GL_COLOR_BUFFER_BIT; + gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; + num_buffers = static_cast(index + 1); - if (src_surface->IsCompressed()) { - glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), - nullptr); - } else { - glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, - static_cast(source_size), nullptr); + const GLenum attachment = static_cast(GL_COLOR_ATTACHMENT0 + index); + AttachTexture(handle, attachment, image_view); } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + if (const ImageView* const image_view = depth_buffer; image_view) { + if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { + buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } else { + buffer_bits |= GL_DEPTH_BUFFER_BIT; + } + const GLenum attachment = AttachmentType(image_view->format); + AttachTexture(handle, attachment, image_view); + } - const GLsizei width = static_cast(dst_params.width); - const GLsizei height = static_cast(dst_params.height); - const GLsizei depth = static_cast(dst_params.depth); - if (dst_surface->IsCompressed()) { - LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); - UNREACHABLE(); + if (num_buffers > 1) { + glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); + } else if (num_buffers > 0) { + glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); - UNREACHABLE(); - } + glNamedFramebufferDrawBuffer(handle, GL_NONE); } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glTextureBarrier(); -} + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); + // TODO + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); -GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); - const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); - OGLBuffer& cp = copy_pbo_cache[l2]; - if (cp.handle == 0) { - const std::size_t ceil_size = 1ULL << l2; - cp.Create(); - cp.MakeStreamCopy(ceil_size); + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(key); + glObjectLabel(GL_FRAMEBUFFER, handle, static_cast(name.size()), name.data()); } - return cp.handle; + framebuffer.handle = handle; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -4,157 +4,247 @@ #pragma once -#include -#include #include -#include -#include -#include +#include #include -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurfaceView; -class CachedSurface; -class TextureCacheOpenGL; +class Device; +class ProgramManager; class StateTracker; -using Surface = std::shared_ptr; -using View = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +class Framebuffer; +class Image; +class ImageView; +class Sampler; -class CachedSurface final : public VideoCommon::SurfaceBase { - friend CachedSurfaceView; +using VideoCommon::ImageId; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +class ImageBufferMap { public: - explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_); - ~CachedSurface(); - - void UploadTexture(const std::vector& staging_buffer) override; - void DownloadTexture(std::vector& staging_buffer) override; + explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); + ~ImageBufferMap(); - GLenum GetTarget() const { - return target; + GLuint Handle() const noexcept { + return handle; } - GLuint GetTexture() const { - return texture.handle; + std::span Span() const noexcept { + return span; } - bool IsCompressed() const { - return is_compressed; +private: + std::span span; + OGLSync* sync; + GLuint handle; +}; + +struct FormatProperties { + GLenum compatibility_class; + bool compatibility_by_size; + bool is_compressed; +}; + +class TextureCacheRuntime { + friend Framebuffer; + friend Image; + friend ImageView; + friend Sampler; + +public: + explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, + StateTracker& state_tracker); + ~TextureCacheRuntime(); + + void Finish(); + + ImageBufferMap MapUploadBuffer(size_t size); + + ImageBufferMap MapDownloadBuffer(size_t size); + + void CopyImage(Image& dst, Image& src, std::span copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + UNIMPLEMENTED(); } -protected: - void DecorateSurfaceName() override; + bool CanImageBeCopied(const Image& dst, const Image& src); + + void EmulateCopyImage(Image& dst, Image& src, std::span copies); + + void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); - View CreateView(const ViewParams& view_key) override; - View CreateViewInner(const ViewParams& view_key, bool is_proxy); + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void InsertUploadMemoryBarrier(); + + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; private: - void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); + struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); - GLenum internal_format{}; - GLenum format{}; - GLenum type{}; - bool is_compressed{}; - GLenum target{}; - u32 view_count{}; + ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - OGLTexture texture; - OGLBuffer texture_buffer; + size_t RequestBuffer(size_t requested_size); + + std::optional FindBuffer(size_t requested_size); + + std::vector syncs; + std::vector buffers; + std::vector maps; + std::vector sizes; + GLenum storage_flags; + GLenum map_flags; + }; + + const Device& device; + StateTracker& state_tracker; + UtilShaders util_shaders; + + std::array, 3> format_properties; + + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; + + OGLTexture null_image_1d_array; + OGLTexture null_image_cube_array; + OGLTexture null_image_3d; + OGLTexture null_image_rect; + OGLTextureView null_image_view_1d; + OGLTextureView null_image_view_2d; + OGLTextureView null_image_view_2d_array; + OGLTextureView null_image_view_cube; + + std::array null_image_views; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class Image : public VideoCommon::ImageBase { + friend ImageView; + public: - explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); - ~CachedSurfaceView(); + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); - /// @brief Attaches this texture view to the currently bound fb_target framebuffer - /// @param attachment Attachment to bind textures to - /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) - void Attach(GLenum attachment, GLenum fb_target) const; + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); + void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies); - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + GLuint Handle() const noexcept { + return texture.handle; } - GLuint GetTexture() const { - if (is_proxy) { - return surface.GetTexture(); - } - return main_view.handle; +private: + void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + OGLTexture texture; + OGLTextureView store_view; + OGLBuffer buffer; + GLenum gl_internal_format = GL_NONE; + GLenum gl_store_format = GL_NONE; + GLenum gl_format = GL_NONE; + GLenum gl_type = GL_NONE; +}; + +class ImageView : public VideoCommon::ImageViewBase { + friend Image; + +public: + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + + [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { + return views[static_cast(query_type)]; } - GLenum GetFormat() const { - return format; + [[nodiscard]] GLuint DefaultHandle() const noexcept { + return default_handle; } - const SurfaceParams& GetSurfaceParams() const { - return surface.GetSurfaceParams(); + [[nodiscard]] GLenum Format() const noexcept { + return internal_format; } private: - OGLTextureView CreateTextureView() const; + void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range); + + std::array views{}; + std::vector stored_views; + GLuint default_handle = 0; + GLenum internal_format = GL_NONE; +}; + +class ImageAlloc : public VideoCommon::ImageAllocBase {}; - CachedSurface& surface; - const GLenum format; - const GLenum target; - const bool is_proxy; +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - std::unordered_map view_cache; - OGLTextureView main_view; + GLuint Handle() const noexcept { + return sampler.handle; + } - // Use an invalid default so it always fails the comparison test - u32 current_swizzle = 0xffffffff; - GLuint current_view = 0; +private: + OGLSampler sampler; }; -class TextureCacheOpenGL final : public TextureCacheBase { +class Framebuffer { public: - explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker); - ~TextureCacheOpenGL(); - -protected: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; + explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; + [[nodiscard]] GLuint Handle() const noexcept { + return framebuffer.handle; + } - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + [[nodiscard]] GLbitfield BufferBits() const noexcept { + return buffer_bits; + } private: - GLuint FetchPBO(std::size_t buffer_size); - - StateTracker& state_tracker; + OGLFramebuffer framebuffer; + GLbitfield buffer_bits = GL_NONE; +}; - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; - std::unordered_map copy_pbo_cache; +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = true; + static constexpr bool HAS_EMULATED_COPIES = true; + + using Runtime = OpenGL::TextureCacheRuntime; + using Image = OpenGL::Image; + using ImageAlloc = OpenGL::ImageAlloc; + using ImageView = OpenGL::ImageView; + using Sampler = OpenGL::Sampler; + using Framebuffer = OpenGL::Framebuffer; }; +using TextureCache = VideoCommon::TextureCache; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { return GL_FILL; } +inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { + switch (filter) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return GL_WEIGHTED_AVERAGE_ARB; + case Tegra::Texture::SamplerReduction::Min: + return GL_MIN; + case Tegra::Texture::SamplerReduction::Max: + return GL_MAX; + } + UNREACHABLE_MSG("Invalid reduction filter={}", static_cast(filter)); + return GL_WEIGHTED_AVERAGE_ARB; +} + inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { // Enumeration order matches register order. We can convert it arithmetically. return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast(swizzle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -23,10 +23,10 @@ #include "core/telemetry_session.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/textures/decoders.h" namespace OpenGL { @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - PrepareRendertarget(framebuffer); RenderScreenshot(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + state_tracker.BindFramebuffer(0); DrawScreen(emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; - const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; - rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); - // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - gl_framebuffer_data.data(), host_ptr); - + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); // Update existing texture @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + // Generate presentation sampler + present_sampler.Create(); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + // Generate VBO handle for drawing vertex_buffer.Create(); @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const auto pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); GLint internal_format; @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast(framebuffer.pixel_format)); + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast(framebuffer.pixel_format)); } texture.resource.Release(); @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyPolygonModes(); state_tracker.NotifyViewport0(); state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask0(); + state_tracker.NotifyColorMask(0); state_tracker.NotifyBlend0(); state_tracker.NotifyFramebuffer(); state_tracker.NotifyFrontFace(); @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, 0); + glBindSampler(0, present_sampler.handle); glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { DrawScreen(layout); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, renderer_settings.screenshot_bits); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -102,6 +102,7 @@ private: StateTracker state_tracker{gpu}; // OpenGL object IDs + OGLSampler present_sampler; OGLBuffer vertex_buffer; OGLProgram vertex_program; OGLProgram fragment_program; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -0,0 +1,224 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" +#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/pitch_unswizzle_comp.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +using namespace HostShaders; + +using VideoCommon::Extent3D; +using VideoCommon::ImageCopy; +using VideoCommon::ImageType; +using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; +using VideoCore::Surface::BytesPerBlock; + +namespace { + +OGLProgram MakeProgram(std::string_view source) { + OGLShader shader; + shader.Create(source, GL_COMPUTE_SHADER); + + OGLProgram program; + program.Create(true, false, shader.handle); + return program; +} + +} // Anonymous namespace + +UtilShaders::UtilShaders(ProgramManager& program_manager_) + : program_manager{program_manager_}, + block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), + block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), + pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { + const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); + swizzle_table_buffer.Create(); + glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); +} + +UtilShaders::~UtilShaders() = default; + +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; + + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); + + const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.slice_size); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glUniform1ui(8, params.block_depth); + glUniform1ui(9, params.block_depth_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + static constexpr GLuint LOC_ORIGIN = 0; + static constexpr GLuint LOC_DESTINATION = 1; + static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; + static constexpr GLuint LOC_PITCH = 3; + + const u32 bytes_per_block = BytesPerBlock(image.info.format); + const GLenum format = StoreFormat(bytes_per_block); + const u32 pitch = image.info.pitch; + + UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), + "Non-power of two images are not implemented"); + + program_manager.BindHostCompute(pitch_unswizzle_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glUniform2ui(LOC_ORIGIN, 0, 0); + glUniform2i(LOC_DESTINATION, 0, 0); + glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); + glUniform1ui(LOC_PITCH, pitch); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr GLuint LOC_SRC_OFFSET = 0; + static constexpr GLuint LOC_DST_OFFSET = 1; + + program_manager.BindHostCompute(copy_bc4_program.handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); + glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, + GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); +} + +GLenum StoreFormat(u32 bytes_per_block) { + switch (bytes_per_block) { + case 1: + return GL_R8UI; + case 2: + return GL_R16UI; + case 4: + return GL_R32UI; + case 8: + return GL_RG32UI; + case 16: + return GL_RGBA32UI; + } + UNREACHABLE(); + return GL_R8UI; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +class Image; +class ImageBufferMap; +class ProgramManager; + +class UtilShaders { +public: + explicit UtilShaders(ProgramManager& program_manager); + ~UtilShaders(); + + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void CopyBC4(Image& dst_image, Image& src_image, + std::span copies); + +private: + ProgramManager& program_manager; + + OGLBuffer swizzle_table_buffer; + + OGLProgram block_linear_unswizzle_2d_program; + OGLProgram block_linear_unswizzle_3d_program; + OGLProgram pitch_unswizzle_program; + OGLProgram copy_bc4_program; +}; + +GLenum StoreFormat(u32 bytes_per_block); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { - if (!GLAD_GL_KHR_debug) { - // We don't need to throw an error as this is just for debugging - return; - } - - std::string object_label; - if (extra_info.empty()) { - switch (identifier) { - case GL_TEXTURE: - object_label = fmt::format("Texture@0x{:016X}", addr); - break; - case GL_PROGRAM: - object_label = fmt::format("Shader@0x{:016X}", addr); - break; - default: - object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); - break; - } - } else { - object_label = fmt::format("{}@0x{:016X}", extra_info, addr); - } - glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/common_types.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..87c8e5693 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -0,0 +1,624 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" +#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/renderer_vulkan/blit_image.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/surface.h" + +namespace Vulkan { + +using VideoCommon::ImageViewType; + +namespace { +struct PushConstants { + std::array tex_scale; + std::array tex_offset; +}; + +template +inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, +}; +constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, +}; +constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, +}; +constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), + .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), +}; +constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, +}; +constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = VkStencilOpState{}, + .back = VkStencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; + +template +inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = filter, + .minFilter = filter, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_TRUE, +}; + +constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const VkDescriptorSetLayout* set_layout) { + return VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + }; +} + +constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, + VkShaderModule shader) { + return VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = stage, + .module = shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; +} + +constexpr std::array MakeStages( + VkShaderModule vertex_shader, VkShaderModule fragment_shader) { + return std::array{ + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), + }; +} + +void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view) { + const VkDescriptorImageInfo image_info{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkWriteDescriptorSet write_descriptor_set{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); +} + +void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view_0, + VkImageView image_view_1) { + const VkDescriptorImageInfo image_info_0{ + .sampler = sampler, + .imageView = image_view_0, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo image_info_1{ + .sampler = sampler, + .imageView = image_view_1, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const std::array write_descriptor_sets{ + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_0, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_1, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, + const std::array& dst_region, + const std::array& src_region) { + const VkOffset2D offset{ + .x = std::min(dst_region[0].x, dst_region[1].x), + .y = std::min(dst_region[0].y, dst_region[1].y), + }; + const VkExtent2D extent{ + .width = static_cast(std::abs(dst_region[1].x - dst_region[0].x)), + .height = static_cast(std::abs(dst_region[1].y - dst_region[0].y)), + }; + const VkViewport viewport{ + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + // TODO: Support scissored blits + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast(src_region[1].x - src_region[0].x); + const float scale_y = static_cast(src_region[1].y - src_region[0].y); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast(src_region[0].x), static_cast(src_region[0].y)}, + }; + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +} + +} // Anonymous namespace + +BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, + StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, + one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( + ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( + TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), + two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(one_texture_set_layout.address()))), + two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), + blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), + convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), + linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), + nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { + if (device.IsExtShaderStencilExportSupported()) { + blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); + } +} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = operation, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; + const VkPipeline pipeline = FindOrEmplacePipeline(key); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, + VkImageView src_depth_view, VkImageView src_stencil_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); + ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); + + const VkPipelineLayout layout = *two_textures_pipeline_layout; + const VkSampler sampler = *nearest_sampler; + const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, + src_stencil_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, + src_stencil_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + + ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = *nearest_sampler; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + const VkExtent2D extent{ + .width = src_image_view.size.width, + .height = src_image_view.size.height, + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, + &device = device](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(blit_color_keys, key); + if (it != blit_color_keys.end()) { + return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; + } + blit_color_keys.push_back(key); + + const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); + const VkPipelineColorBlendAttachmentState blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + // TODO: programmable blending + const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *blit_color_pipelines.back(); +} + +VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { + if (blit_depth_stencil_pipeline) { + return *blit_depth_stencil_pipeline; + } + const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); + blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + return *blit_depth_stencil_pipeline; +} + +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..2c2790bf9 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/texture_cache/types.h" + +namespace Vulkan { + +using VideoCommon::Offset2D; + +class VKDevice; +class VKScheduler; +class StateTracker; + +class Framebuffer; +class ImageView; + +struct BlitImagePipelineKey { + constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; + + VkRenderPass renderpass; + Tegra::Engines::Fermi2D::Operation operation; +}; + +class BlitImageHelper { +public: + explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, + StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + ~BlitImageHelper(); + + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, + VkImageView src_stencil_view, const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +private: + void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view); + + [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + + [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + + void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + const VKDevice& device; + VKScheduler& scheduler; + StateTracker& state_tracker; + + vk::DescriptorSetLayout one_texture_set_layout; + vk::DescriptorSetLayout two_textures_set_layout; + DescriptorAllocator one_texture_descriptor_allocator; + DescriptorAllocator two_textures_descriptor_allocator; + vk::PipelineLayout one_texture_pipeline_layout; + vk::PipelineLayout two_textures_pipeline_layout; + vk::ShaderModule full_screen_vert; + vk::ShaderModule blit_color_to_color_frag; + vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule convert_depth_to_float_frag; + vk::ShaderModule convert_float_to_depth_frag; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; + + std::vector blit_color_keys; + std::vector blit_color_pipelines; + vk::Pipeline blit_depth_stencil_pipeline; + vk::Pipeline convert_d32_to_r32_pipeline; + vk::Pipeline convert_r32_to_d32_pipeline; + vk::Pipeline convert_d16_to_r16_pipeline; + vk::Pipeline convert_r16_to_d16_pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..67dd10500 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); + msaa_mode.Assign(regs.multisample_mode); raw2 = 0; const auto test_func = diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..7e95e6fce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -186,6 +186,7 @@ struct FixedPipelineState { BitField<19, 4, u32> logic_op; BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; + BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 58e117eb3..40501e7fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -122,7 +122,7 @@ struct FormatTuple { {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM - {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT @@ -163,7 +163,7 @@ struct FormatTuple { {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT {VK_FORMAT_UNDEFINED}, // R16G16_UINT - {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB @@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { - tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) - ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 - : VK_FORMAT_A8B8G8R8_UNORM_PACK32; + const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); + tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32; } const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; VkFormatFeatureFlags usage; - if (format_type == FormatType::Buffer) { + switch (format_type) { + case FormatType::Buffer: usage = VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - } else { + break; + case FormatType::Linear: + case FormatType::Optimal: usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; if (attachable) { @@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo if (storage) { usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } + break; } return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } @@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) return {}; } +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { + switch (reduction) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; + case Tegra::Texture::SamplerReduction::Min: + return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; + case Tegra::Texture::SamplerReduction::Max: + return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; + } + UNREACHABLE_MSG("Invalid sampler mode={}", static_cast(reduction)); + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..1a90f192e 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ea4b7c1e6..7f521cb9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() { return library; } -std::pair CreateInstance( - Common::DynamicLibrary& library, vk::InstanceDispatch& dld, - WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { +std::pair CreateInstance(Common::DynamicLibrary& library, + vk::InstanceDispatch& dld, WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); return {}; @@ -133,7 +133,7 @@ std::pair CreateInstance( if (window_type != Core::Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - if (enable_layers) { + if (enable_debug_utils) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { bool RendererVulkan::Init() { library = OpenVulkanLibrary(); std::tie(instance, instance_version) = CreateInstance( - library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); + library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } @@ -447,7 +447,8 @@ void RendererVulkan::Report() const { std::vector RendererVulkan::EnumerateDevices() { vk::InstanceDispatch dld; Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = CreateInstance(library, dld).first; + vk::Instance instance = + CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; if (!instance) { return {}; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 977b86003..74642fba4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -33,10 +33,9 @@ class VKDevice; class VKMemoryManager; class VKSwapchain; class VKScheduler; -class VKImage; struct VKScreenInfo { - VKImage* image{}; + VkImageView image_view{}; u32 width{}; u32 height{}; bool is_srgb{}; diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag deleted file mode 100644 index a06ecd24a..000000000 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -void main() { - color = texture(color_texture, frag_tex_coord); -} diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert deleted file mode 100644 index c64d9235a..000000000 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; - -layout (location = 0) out vec2 frag_tex_coord; - -layout (set = 0, binding = 0) uniform MatrixBlock { - mat4 modelview_matrix; -}; - -void main() { - gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp deleted file mode 100644 index 5a5703308..000000000 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) buffer OutputBuffer { - uint output_indexes[]; -}; - -layout (push_constant) uniform PushConstants { - uint first; -}; - -void main() { - uint primitive = gl_GlobalInvocationID.x; - if (primitive * 6 >= output_indexes.length()) { - return; - } - - const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); - for (uint vertex = 0; vertex < 6; ++vertex) { - uint index = first + primitive * 4 + quad_map[vertex]; - output_indexes[primitive * 6 + vertex] = index; - } -} diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp deleted file mode 100644 index 5a472ba9b..000000000 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V quad_indexed.comp -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { - uint input_indexes[]; -}; - -layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { - uint output_indexes[]; -}; - -layout (push_constant) uniform PushConstants { - uint base_vertex; - int index_shift; // 0: uint8, 1: uint16, 2: uint32 -}; - -void main() { - int primitive = int(gl_GlobalInvocationID.x); - if (primitive * 6 >= output_indexes.length()) { - return; - } - - int index_size = 8 << index_shift; - int flipped_shift = 2 - index_shift; - int mask = (1 << flipped_shift) - 1; - - const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); - for (uint vertex = 0; vertex < 6; ++vertex) { - int offset = primitive * 4 + quad_swizzle[vertex]; - int int_offset = offset >> flipped_shift; - int bit_offset = (offset & mask) * index_size; - uint packed_input = input_indexes[int_offset]; - uint index = bitfieldExtract(packed_input, bit_offset, index_size); - output_indexes[primitive * 6 + vertex] = index + base_vertex; - } -} diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp deleted file mode 100644 index a320f3ae0..000000000 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core -#extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_shader_8bit_storage : require - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { - uint8_t input_indexes[]; -}; - -layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { - uint16_t output_indexes[]; -}; - -void main() { - uint id = gl_GlobalInvocationID.x; - if (id < input_indexes.length()) { - output_indexes[id] = uint16_t(input_indexes[id]); - } -} diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..d3a83f22f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -16,12 +16,12 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" -#include "video_core/morton.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -29,108 +29,12 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace Vulkan { namespace { -// Generated from the "shaders/" directory, read the instructions there. -constexpr u8 blit_vertex_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00}; - -constexpr u8 blit_fragment_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; - struct ScreenRectVertex { ScreenRectVertex() = default; explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} @@ -173,9 +77,9 @@ constexpr std::array MakeOrthographicMatrix(f32 width, f32 height) { // clang-format on } -std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { using namespace VideoCore::Surface; - return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); + return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); } std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); - - UpdateDescriptorSet(image_index, blit_image->GetPresentView()); + UpdateDescriptorSet(image_index, + use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); BufferData data; SetUniformData(data, framebuffer); SetVertexData(data, framebuffer); auto map = buffer_commit->Map(); - std::memcpy(map.GetAddress(), &data, sizeof(data)); + std::memcpy(map.Address(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer, image_index); - const auto pixel_format = - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); - rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); + const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); + const size_t size_bytes = GetSizeInBytes(framebuffer); + rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - map.GetAddress() + image_offset, host_ptr); - - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + Tegra::Texture::UnswizzleTexture( + std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); const VkBufferImageCopy copy{ .bufferOffset = image_offset, @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool }, }; scheduler.Record( - [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); }); } map.Release(); - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkClearValue clear_color{ .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, }; - - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = size; - renderpass_bi.clearValueCount = 1; - renderpass_bi.pClearValues = &clear_color; - - VkViewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast(size.width); - viewport.height = static_cast(size.height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - - VkRect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = size; - + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(size.width), + .height = static_cast(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.SetViewport(0, viewport); @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) } void VKBlitScreen::CreateShaders() { - vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); - fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); + vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); + fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); } void VKBlitScreen::CreateSemaphores() { @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() { const VkAttachmentReference color_attachment_ref{ .attachment = 0, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .layout = VK_IMAGE_LAYOUT_GENERAL, }; const VkSubpassDescription subpass_description{ @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); + raw_image_views.resize(image_count); raw_buffer_commits.resize(image_count); - const VkImageCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = GetFormat(framebuffer), - .extent = - { - .width = framebuffer.width, - .height = framebuffer.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - - for (std::size_t i = 0; i < image_count; ++i) { - raw_images[i] = std::make_unique(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); - raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = GetFormat(framebuffer), + .extent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); + raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *raw_images[i], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = GetFormat(framebuffer), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } } @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag const VkDescriptorImageInfo image_info{ .sampler = *sampler, .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkWriteDescriptorSet sampler_write{ diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..2ee374247 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -35,7 +35,6 @@ struct ScreenInfo; class RasterizerVulkan; class VKDevice; -class VKImage; class VKScheduler; class VKSwapchain; @@ -110,7 +109,8 @@ private: std::vector resource_ticks; std::vector semaphores; - std::vector> raw_images; + std::vector raw_images; + std::vector raw_image_views; std::vector raw_buffer_commits; u32 raw_width = 0; u32 raw_height = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 444d3fb93..10d296c2f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; +constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { - return std::make_unique(device, scheduler, BUFFER_USAGE); + return std::make_unique(device, scheduler); } } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) - : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { + : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} { const VkBufferCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record( - [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); - - const VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = UPLOAD_ACCESS_BARRIERS, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = data_size, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, - barrier, {}); - }); + scheduler.Record([staging = *staging.handle, handle, offset, data_size, + &device = device](vk::CommandBuffer cmdbuf) { + const VkBufferMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | + VK_ACCESS_HOST_WRITE_BIT | + (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + const VkBufferMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, + write_barrier); + }); } void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { @@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) - : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, + VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, + VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache{rasterizer_, gpu_memory_, + cpu_memory_, stream_buffer_}, device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6008b8373..daf498222 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -41,6 +41,7 @@ public: } private: + const VKDevice& device; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -49,10 +50,11 @@ private: class VKBufferCache final : public VideoCommon::BufferCache { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStreamBuffer& stream_buffer, + VKStagingBufferPool& staging_pool); ~VKBufferCache(); BufferInfo GetEmptyBuffer(std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1ac7e2a30..2c030e910 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,9 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" +#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" +#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -22,99 +25,6 @@ namespace Vulkan { namespace { -// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. -constexpr u8 quad_array[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { return { .binding = 0, @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { }; } -// Uint8 SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 uint8_pass[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, - 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, - 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, - 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, - 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - -// Quad indexed SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 QUAD_INDEXED_SPV[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, - 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, - 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, - 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, - 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, - 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, - 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, - 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - std::array BuildInputOutputDescriptorSetBindings() { return {{ { @@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, - vk::Span push_constants, std::size_t code_size, - const u8* code) { + vk::Span push_constants, + std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .bindingCount = bindings.size(), .pBindings = bindings.data(), }); - layout = device.GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .pushConstantRangeCount = push_constants.size(), .pPushConstantRanges = push_constants.data(), }); - if (!templates.empty()) { descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } - - auto code_copy = std::make_unique(code_size / sizeof(u32) + 1); - std::memcpy(code_copy.get(), code, code_size); - module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = code_copy.get(), + .codeSize = static_cast(code.size_bytes()), + .pCode = code.data(), }); - pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), + BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -510,12 +211,11 @@ std::pair QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), - uint8_pass), + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), - QUAD_INDEXED_SPV), + BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 2dc87902c..abdf61e2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "common/common_types.h" @@ -24,8 +25,7 @@ public: explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, - vk::Span push_constants, std::size_t code_size, - const u8* code); + vk::Span push_constants, std::span code); ~VKComputePass(); protected: diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ce3846195..370a63f74 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, + VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -122,6 +123,7 @@ std::unordered_map GetFormatProperties( VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, @@ -161,18 +163,32 @@ std::unordered_map GetFormatProperties( VK_FORMAT_BC2_SRGB_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, VK_FORMAT_ASTC_8x6_UNORM_BLOCK, @@ -192,7 +208,7 @@ std::unordered_map GetFormatProperties( VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, + : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); @@ -214,7 +230,7 @@ bool VKDevice::Create() { features2.features = { .robustBufferAccess = false, .fullDrawIndexUint32 = false, - .imageCubeArray = false, + .imageCubeArray = true, .independentBlend = true, .geometryShader = true, .tessellationShader = true, @@ -242,7 +258,7 @@ bool VKDevice::Create() { .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, + .shaderStorageImageMultisample = true, .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = false, @@ -268,7 +284,6 @@ bool VKDevice::Create() { .variableMultisampleRate = false, .inheritedQueries = false, }; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, .pNext = nullptr, @@ -380,6 +395,20 @@ bool VKDevice::Create() { LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + if (ext_robustness2) { + robustness2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, + .pNext = nullptr, + .robustBufferAccess2 = false, + .robustImageAccess2 = true, + .nullDescriptor = true, + }; + SetNext(next, robustness2); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -405,7 +434,14 @@ bool VKDevice::Create() { } CollectTelemetryParameters(); + CollectToolingInfo(); + if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { + LOG_WARNING( + Render_Vulkan, + "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); + ext_extended_dynamic_state = false; + } if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it // seems to cause stability issues @@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const { LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); // Wait for the log to flush and for Nsight Aftermath to dump the results - std::this_thread::sleep_for(std::chrono::seconds{3}); + std::this_thread::sleep_for(std::chrono::seconds{15}); } void VKDevice::SaveShader(const std::vector& spirv) const { @@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) return true; } +bool VKDevice::TestDepthStencilBlits() const { + static constexpr VkFormatFeatureFlags required_features = + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const auto test_features = [](VkFormatProperties props) { + return (props.optimalTilingFeatures & required_features) == required_features; + }; + return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && + test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); +} + bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { const auto features{physical.GetFeatures()}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), @@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), + std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), }; @@ -608,6 +656,7 @@ std::vector VKDevice::LoadExtensions() { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_robustness2{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -627,11 +676,15 @@ std::vector VKDevice::LoadExtensions() { test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); + test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); + test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); if (instance_version >= VK_API_VERSION_1_1) { test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); } @@ -733,6 +786,18 @@ std::vector VKDevice::LoadExtensions() { } } + if (has_ext_robustness2) { + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = nullptr; + features.pNext = &robustness2; + physical.GetFeatures2KHR(features); + if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { + extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + ext_robustness2 = true; + } + } + return extensions; } @@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) { void VKDevice::SetupFeatures() { const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } @@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() { } } +void VKDevice::CollectToolingInfo() { + if (!ext_tooling_info) { + return; + } + const auto vkGetPhysicalDeviceToolPropertiesEXT = + reinterpret_cast( + dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); + if (!vkGetPhysicalDeviceToolPropertiesEXT) { + return; + } + u32 tool_count = 0; + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { + return; + } + std::vector tools(tool_count); + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { + return; + } + for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { + const std::string_view name = tool.name; + LOG_INFO(Render_Vulkan, "{}", name); + has_renderdoc = has_renderdoc || name == "RenderDoc"; + has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; + } +} + std::vector VKDevice::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 4286673d9..995dcfc0f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -157,6 +157,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true when blitting from and to depth stencil images is supported. + bool IsBlitDepthStencilSupported() const { + return is_blit_depth_stencil_supported; + } + /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { return nv_viewport_swizzle; @@ -172,6 +177,11 @@ public: return ext_index_type_uint8; } + /// Returns true if the device supports VK_EXT_sampler_filter_minmax. + bool IsExtSamplerFilterMinmaxSupported() const { + return ext_sampler_filter_minmax; + } + /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { return ext_depth_range_unrestricted; @@ -197,6 +207,16 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_shader_stencil_export. + bool IsExtShaderStencilExportSupported() const { + return ext_shader_stencil_export; + } + + /// Returns true when a known debugging tool is attached. + bool HasDebuggingToolAttached() const { + return has_renderdoc || has_nsight_graphics; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -228,16 +248,23 @@ private: /// Collects telemetry information from the device. void CollectTelemetryParameters(); + /// Collects information about attached tools. + void CollectToolingInfo(); + /// Returns a list of queue initialization descriptors. std::vector GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + /// Returns true if the device natively supports blitting depth stencil images. + bool TestDepthStencilBlits() const; + /// Returns true if a format is supported. bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + VkInstance instance; ///< Vulkan instance. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. VkPhysicalDeviceProperties properties; ///< Device properties. @@ -253,15 +280,22 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Asynchronous Graphics Pipeline setting bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0bcaee714..774a12a53 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const { } VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, - VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, - VKQueryCache& query_cache_, const VKDevice& device_, - VKScheduler& scheduler_) + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, + VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, + const VKDevice& device_, VKScheduler& scheduler_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, device{device_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c8547cc24..c2869e8e3 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -8,6 +8,7 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { @@ -24,7 +25,6 @@ class VKBufferCache; class VKDevice; class VKQueryCache; class VKScheduler; -class VKTextureCache; class InnerFence : public VideoCommon::FenceBase { public: @@ -51,12 +51,12 @@ private: using Fence = std::shared_ptr; using GenericFenceManager = - VideoCommon::FenceManager; + VideoCommon::FenceManager; class VKFenceManager final : public GenericFenceManager { public: explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, const VKDevice& device_, VKScheduler& scheduler_); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..7979df3a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -15,7 +15,6 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { }; } +VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // Anonymous namespace VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_, - const GraphicsPipelineCacheKey& key_, - vk::Span bindings_, - const SPIRVProgram& program_) - : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, + const GraphicsPipelineCacheKey& key, + vk::Span bindings, + const SPIRVProgram& program, u32 num_color_buffers) + : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, + descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( - program_)}, - renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, - pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} + descriptor_template{CreateDescriptorUpdateTemplate(program)}, + modules(CreateShaderModules(program)), + pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -179,8 +200,9 @@ std::vector VKGraphicsPipeline::CreateShaderModules( return shader_modules; } -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, + VkRenderPass renderpass, + u32 num_color_buffers) const { const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; @@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array swizzles; - std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), - UnpackViewportSwizzle); + std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array cb_attachments; - const auto num_attachments = static_cast(renderpass_params.num_color_attachments); - for (std::size_t index = 0; index < num_attachments; ++index) { + for (std::size_t index = 0; index < num_color_buffers; ++index) { static constexpr std::array COMPONENT_TABLE{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, @@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .flags = 0, .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = static_cast(num_attachments), + .attachmentCount = num_color_buffers, .pAttachments = cb_attachments.data(), .blendConstants = {}, }; @@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa stage_ci.pNext = &subgroup_size_ci; } } - - const VkGraphicsPipelineCreateInfo ci{ + return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .subpass = 0, .basePipelineHandle = nullptr, .basePipelineIndex = 0, - }; - return device.GetLogical().CreateGraphicsPipeline(ci); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3fb31d55a..214d06b4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -8,10 +8,10 @@ #include #include +#include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -20,8 +20,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; + VkRenderPass renderpass; std::array shaders; FixedPipelineState fixed_state; @@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey { } std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); } }; static_assert(std::has_unique_object_representations_v); @@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v); class VKDescriptorPool; class VKDevice; -class VKRenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; @@ -52,12 +50,11 @@ using SPIRVProgram = std::array, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_, - const GraphicsPipelineCacheKey& key_, - vk::Span bindings_, - const SPIRVProgram& program_); + const GraphicsPipelineCacheKey& key, + vk::Span bindings, + const SPIRVProgram& program, u32 num_color_buffers); ~VKGraphicsPipeline(); VkDescriptorSet CommitDescriptorSet(); @@ -70,10 +67,6 @@ public: return *layout; } - VkRenderPass GetRenderPass() const { - return renderpass; - } - GraphicsPipelineCacheKey GetCacheKey() const { return cache_key; } @@ -89,8 +82,8 @@ private: std::vector CreateShaderModules(const SPIRVProgram& program) const; - vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const; + vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, + u32 num_color_buffers) const; const VKDevice& device; VKScheduler& scheduler; @@ -104,7 +97,6 @@ private: vk::DescriptorUpdateTemplateKHR descriptor_template; std::vector modules; - VkRenderPass renderpass; vk::Pipeline pipeline; }; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 072d14e3b..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, - const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) - : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, - image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { - UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, - "Queue family tracking is not implemented"); - - image = device_.GetLogical().CreateImage(image_ci_); - - const u32 num_ranges = image_num_layers * image_num_levels; - barriers.resize(num_ranges); - subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); -} - -VKImage::~VKImage() = default; - -void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { - return; - } - - std::size_t cursor = 0; - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - auto& barrier = barriers[cursor]; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = state.access; - barrier.dstAccessMask = new_access; - barrier.oldLayout = state.layout; - barrier.newLayout = new_layout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = *image; - barrier.subresourceRange.aspectMask = aspect_mask; - barrier.subresourceRange.baseMipLevel = level; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = layer; - barrier.subresourceRange.layerCount = 1; - state.access = new_access; - state.layout = new_layout; - } - } - - scheduler.RequestOutsideRenderPassOperationContext(); - - scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { - // TODO(Rodrigo): Implement a way to use the latest stage across subresources. - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, - vk::Span(barriers.data(), cursor)); - }); -} - -bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept { - const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && - base_level == 0 && num_levels == image_num_levels; - if (!is_full_range) { - state_diverged = true; - } - - if (!state_diverged) { - auto& state = GetSubrangeState(0, 0); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - } - return false; -} - -void VKImage::CreatePresentView() { - // Image type has to be 2D to be presented. - present_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); -} - -VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { - return subrange_states[static_cast(layer * image_num_levels) + - static_cast(level)]; -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index 287ab90ca..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKScheduler; - -class VKImage { -public: - explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, - const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); - ~VKImage(); - - /// Records in the passed command buffer an image transition and updates the state of the image. - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout); - - /// Returns a view compatible with presentation, the image has to be 2D. - VkImageView GetPresentView() { - if (!present_view) { - CreatePresentView(); - } - return *present_view; - } - - /// Returns the Vulkan image handler. - const vk::Image& GetHandle() const { - return image; - } - - /// Returns the Vulkan format for this image. - VkFormat GetFormat() const { - return format; - } - - /// Returns the Vulkan aspect mask. - VkImageAspectFlags GetAspectMask() const { - return aspect_mask; - } - -private: - struct SubrangeState final { - VkAccessFlags access = 0; ///< Current access bits. - VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. - }; - - bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept; - - /// Creates a presentation view. - void CreatePresentView(); - - /// Returns the subrange state for a layer and layer. - SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; - - const VKDevice& device; ///< Device handler. - VKScheduler& scheduler; ///< Device scheduler. - - const VkFormat format; ///< Vulkan format. - const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. - const u32 image_num_layers; ///< Number of layers. - const u32 image_num_levels; ///< Number of mipmap levels. - - vk::Image image; ///< Image handle. - vk::ImageView present_view; ///< Image view compatible with presentation. - - std::vector barriers; ///< Pool of barriers. - std::vector subrange_states; ///< Current subrange state. - - bool state_diverged = false; ///< True when subresources mismatch in layout. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index be53d450f..56b24b70f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() { } MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - return MemoryMap{this, memory.Map(interval.first + offset_, size)}; + return MemoryMap(this, std::span(memory.Map(interval.first + offset_, size), size)); } void VKMemoryCommitImpl::Unmap() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 39f903ec8..318f8b43e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -93,8 +94,8 @@ private: /// Holds ownership of a memory map. class MemoryMap final { public: - explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) - : commit{commit_}, address{address_} {} + explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span span_) + : commit{commit_}, span{span_} {} ~MemoryMap() { if (commit) { @@ -108,19 +109,24 @@ public: commit = nullptr; } + /// Returns a span to the memory map. + [[nodiscard]] std::span Span() const noexcept { + return span; + } + /// Returns the address of the memory map. - u8* GetAddress() const { - return address; + [[nodiscard]] u8* Address() const noexcept { + return span.data(); } /// Returns the address of the memory map; - operator u8*() const { - return address; + [[nodiscard]] operator u8*() const noexcept { + return span.data(); } private: const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. - u8* address{}; ///< Address to the mapped memory. + std::span span; ///< Address to the mapped memory. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3fb264d03..083796d05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -8,6 +8,7 @@ #include #include "common/bit_cast.h" +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -22,7 +23,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - VideoCommon::Shader::CompileDepth::FullDecompile}; + .depth = VideoCommon::Shader::CompileDepth::FullDecompile, + .disable_else_derivation = true, +}; constexpr std::size_t GetStageFromProgram(std::size_t program) { return program == 0 ? 0 : program - 1; @@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_ Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_) - : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, - descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, - renderpass_cache{renderpass_cache_} {} + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ + update_descriptor_queue_} {} VKPipelineCache::~VKPipelineCache() = default; @@ -199,7 +200,8 @@ std::array VKPipelineCache::GetShaders() { } VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { + const GraphicsPipelineCacheKey& key, u32 num_color_buffers, + VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { @@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, bindings, - program, key); + update_descriptor_queue, bindings, program, key, + num_color_buffers); } last_graphics_pipeline = pair->second.get(); return last_graphics_pipeline; @@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, key, - bindings, program); + update_descriptor_queue, key, bindings, + program, num_color_buffers); gpu.ShaderNotify().MarkShaderComplete(); } last_graphics_pipeline = entry.get(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 9e1f8fcbb..fbaa8257c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,7 +19,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" @@ -119,18 +118,18 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_); + explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const VKDevice& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); ~VKPipelineCache() override; std::array GetShaders(); VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + u32 num_color_buffers, VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); @@ -153,7 +152,6 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - VKRenderPassCache& renderpass_cache; std::unique_ptr null_shader; std::unique_ptr null_kernel; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f93986aab..04c5c859c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/settings.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -30,8 +31,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -39,10 +38,13 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192 namespace { -constexpr auto ComputeShaderIndex = static_cast(Tegra::Engines::ShaderType::Compute); +constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si return viewport; } -VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; if (src.enable) { @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { std::array GetShaderAddresses( const std::array& shaders) { std::array addresses; - for (std::size_t i = 0; i < std::size(addresses); ++i) { + for (size_t i = 0; i < std::size(addresses); ++i) { addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; } return addresses; } -void TransitionImages(const std::vector& views, VkPipelineStageFlags pipeline_stage, - VkAccessFlags access) { - for (auto& [view, layout] : views) { - view->Transition(*layout, pipeline_stage, access); +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); } -} + + u32 image; + u32 sampler; +}; template -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage, std::size_t index = 0) { - const auto stage_type = static_cast(stage); +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + size_t stage, size_t index = 0) { + const auto shader_type = static_cast(stage); if constexpr (std::is_same_v) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; const u32 buffer_2 = entry.secondary_buffer; const u32 offset_1 = entry.offset; const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); - return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); + const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); - } - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 entry_offset = static_cast(index * gpu_profile.GetTextureHandlerSize()); - const u32 offset = entry.offset + entry_offset; - if constexpr (std::is_same_v) { - return engine.GetStageTexture(stage_type, offset); - } else { - return engine.GetTexture(offset); - } -} - -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { - if (!is_clear) { - return true; - } - // First we have to make sure all clear masks are enabled. - if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || - !regs.clear_buffers.A) { - return true; - } - // If scissors are disabled, the whole screen is cleared - if (!regs.clear_flags.scissor) { - return false; + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } - // Then we have to confirm scissor testing clears the whole image - const std::size_t index = regs.clear_buffers.RT; - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || - scissor.max_y < regs.rt[index].height; + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { - // If we are not clearing, the contents have to be preserved - if (!is_clear) { - return true; - } - // For depth stencil clears we only have to confirm scissor test covers the whole image - if (!regs.clear_flags.scissor) { - return false; - } - // Make sure the clear cover the whole image - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || - scissor.max_y < regs.zeta_height; -} - -template +template std::array ExpandStrides(const std::array& strides) { std::array expanded; std::copy(strides.begin(), strides.end(), expanded.begin()); return expanded; } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::e2D; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, + ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { + for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.samplers) { + for (size_t i = 0; i < entry.size; ++i) { + const VkSampler sampler = *sampler_ptr++; + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddSampledImage(handle, sampler); + } + } + for ([[maybe_unused]] const auto& entry : entries.storage_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.images) { + // TODO: Mark as modified + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddImage(handle); + } +} + } // Anonymous namespace class BufferBindings final { @@ -290,7 +316,7 @@ public: private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { - std::size_t num_buffers = 0; + size_t num_buffers = 0; std::array buffers; std::array offsets; std::array sizes; @@ -303,7 +329,7 @@ private: VkIndexType type; } index; - template + template void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { if (device.IsExtExtendedDynamicStateSupported()) { if (index.buffer) { @@ -320,7 +346,7 @@ private: } } - template + template void BindStatic(VKScheduler& scheduler) const { static_assert(N <= Maxwell::NumVertexArrays); if constexpr (N == 0) { @@ -385,20 +411,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, const VKDevice& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) - : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), - maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), - device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), - scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), - descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), - renderpass_cache(device), + : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, + gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, + screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, + state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), + staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), + update_descriptor_queue(device, scheduler), + blit_image(device, scheduler, state_tracker, descriptor_pool), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), + texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue, renderpass_cache), - buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + descriptor_pool, update_descriptor_queue), + buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, + staging_pool), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParameters draw_params = SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeGraphicsDescriptors(); + + texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); key.shaders = GetShaderAddresses(shaders); @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(false); - SetupImageTransitions(texceptions, color_attachments, zeta_attachment); - - key.renderpass_params = GetRenderPassParams(texceptions); - key.padding = 0; + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + key.renderpass = framebuffer->RenderPass(); - auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + auto* const pipeline = + pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; } - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - - const auto renderpass = pipeline->GetRenderPass(); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - UpdateDynamicStates(); - buffer_bindings.Bind(device, scheduler); BeginTransformFeedback(); + scheduler.RequestRenderpass(framebuffer); + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); + UpdateDynamicStates(); + const auto pipeline_layout = pipeline->GetLayout(); const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() { return; } - sampled_views.clear(); - image_views.clear(); - query_cache.UpdateCounters(); const auto& regs = maxwell3d.regs; @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(true); - DEBUG_ASSERT(texceptions.none()); - SetupImageTransitions(0, color_attachments, zeta_attachment); + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + const VkExtent2D render_area = framebuffer->RenderArea(); + scheduler.RequestRenderpass(framebuffer); - const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - VkClearRect clear_rect; - clear_rect.baseArrayLayer = regs.clear_buffers.layer; - clear_rect.layerCount = 1; - clear_rect.rect = GetScissorState(regs, 0); - clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); - clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); + VkClearRect clear_rect{ + .rect = GetScissorState(regs, 0), + .baseArrayLayer = regs.clear_buffers.layer, + .layerCount = 1, + }; + if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { + return; + } + clear_rect.rect.extent = VkExtent2D{ + .width = std::min(clear_rect.rect.extent.width, render_area.width), + .height = std::min(clear_rect.rect.extent.height, render_area.height), + }; if (use_color) { VkClearValue clear_value; @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() { void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { MICROPROFILE_SCOPE(Vulkan_Compute); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); query_cache.UpdateCounters(); @@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - buffer_cache.Map(CalculateComputeStreamBufferSize()); + image_view_indices.clear(); + sampler_handles.clear(); + + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeComputeDescriptors(); const auto& entries = pipeline.GetEntries(); - SetupComputeConstBuffers(entries); - SetupComputeGlobalBuffers(entries); SetupComputeUniformTexels(entries); SetupComputeTextures(entries); SetupComputeStorageTexels(entries); SetupComputeImages(entries); - buffer_cache.Unmap(); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); - TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + buffer_cache.Map(CalculateComputeStreamBufferSize()); + update_descriptor_queue.Acquire(); + + SetupComputeConstBuffers(entries); + SetupComputeGlobalBuffers(entries); + + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); + + buffer_cache.Unmap(); + + const VkPipeline pipeline_handle = pipeline.GetHandle(); + const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); + const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), - layout = pipeline.GetLayout(), - descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { + grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, + descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, descriptor_set, {}); cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); @@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerVulkan::SyncGuestHost() { - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); pipeline_cache.SyncGuestHost(); } +void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + pipeline_cache.OnCPUWrite(addr, size); +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); @@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() { }); } +void RasterizerVulkan::FragmentBarrier() { + // We already put barriers when a render pass finishes +} + +void RasterizerVulkan::TiledCacheBarrier() { + // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend +} + void RasterizerVulkan::FlushCommands() { if (draw_counter > 0) { draw_counter = 0; @@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() { void RasterizerVulkan::TickFrame() { draw_counter = 0; update_descriptor_queue.TickFrame(); + fence_manager.TickFrame(); buffer_cache.TickFrame(); staging_pool.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } @@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); + if (!image_view) { return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - - screen_info.image = &surface->GetImage(); - screen_info.width = params.width; - screen_info.height = params.height; - screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; + screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.width = image_view->size.width; + screen_info.height = image_view->size.height; + screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } @@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - - const auto& regs = maxwell3d.regs; - auto& dirty = maxwell3d.dirty.flags; - const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; - dirty[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - Texceptions texceptions; - for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); - } - if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { - texceptions[rt] = true; - } - } - - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); - zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); - } - if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { - texceptions[ZETA_TEXCEPTION_INDEX] = true; - } - - texture_cache.GuardRenderTargets(false); - - return texceptions; -} - -bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { - bool overlap = false; - for (auto& [view, layout] : sampled_views) { - if (!attachment.IsSameSurface(*view)) { - continue; - } - overlap = true; - *layout = VK_IMAGE_LAYOUT_GENERAL; - } - return overlap; -} - -std::tuple RasterizerVulkan::ConfigureFramebuffers( - VkRenderPass renderpass) { - FramebufferCacheKey key{ - .renderpass = renderpass, - .width = std::numeric_limits::max(), - .height = std::numeric_limits::max(), - .layers = std::numeric_limits::max(), - .views = {}, - }; - - const auto try_push = [&key](const View& view) { - if (!view) { - return false; - } - key.views.push_back(view->GetAttachment()); - key.width = std::min(key.width, view->GetWidth()); - key.height = std::min(key.height, view->GetHeight()); - key.layers = std::min(key.layers, view->GetNumLayers()); - return true; - }; - - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast(regs.rt_control.count); - for (std::size_t index = 0; index < num_attachments; ++index) { - if (try_push(color_attachments[index])) { - texture_cache.MarkColorBufferInUse(index); - } - } - if (try_push(zeta_attachment)) { - texture_cache.MarkDepthBufferInUse(); - } - - const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); - auto& framebuffer = fbentry->second; - if (is_cache_miss) { - framebuffer = device.GetLogical().CreateFramebuffer({ - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .renderPass = key.renderpass, - .attachmentCount = static_cast(key.views.size()), - .pAttachments = key.views.data(), - .width = key.width, - .height = key.height, - .layers = key.layers, - }); - } - - return {*framebuffer, VkExtent2D{key.width, key.height}}; -} - RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, @@ -885,50 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt void RasterizerVulkan::SetupShaderDescriptors( const std::array& shaders) { - texture_cache.GuardSamplers(true); - - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage + image_view_indices.clear(); + sampler_handles.clear(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { Shader* const shader = shaders[stage + 1]; if (!shader) { continue; } const auto& entries = shader->GetEntries(); - SetupGraphicsConstBuffers(entries, stage); - SetupGraphicsGlobalBuffers(entries, stage); SetupGraphicsUniformTexels(entries, stage); SetupGraphicsTextures(entries, stage); SetupGraphicsStorageTexels(entries, stage); SetupGraphicsImages(entries, stage); } - texture_cache.GuardSamplers(false); -} + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); -void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, - const ZetaAttachment& zeta) { - TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + update_descriptor_queue.Acquire(); - for (std::size_t rt = 0; rt < color.size(); ++rt) { - const auto color_attachment = color[rt]; - if (color_attachment == nullptr) { + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + // Skip VertexA stage + Shader* const shader = shaders[stage + 1]; + if (!shader) { continue; } - const auto image_layout = - texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); - } - - if (zeta != nullptr) { - const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + const auto& entries = shader->GetEntries(); + SetupGraphicsConstBuffers(entries, stage); + SetupGraphicsGlobalBuffers(entries, stage); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); } } @@ -1000,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() { void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const auto& regs = maxwell3d.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { continue; @@ -1009,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; ASSERT(end >= start); - const std::size_t size = end - start; + const size_t size = end - start; if (size == 0) { buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); continue; @@ -1070,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar } } -void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { @@ -1078,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s } } -void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); const auto& cbufs{maxwell3d.state.shader_stages[stage]}; @@ -1088,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, } } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = + GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupImage(tic, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } @@ -1128,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); - Tegra::Engines::ConstBufferInfo buffer; - buffer.address = config.Address(); - buffer.size = config.size; - buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(entry, buffer); + const Tegra::Engines::ConstBufferInfo info{ + .address = config.Address(), + .size = config.size, + .enabled = mask[entry.GetIndex()], + }; + SetupConstBuffer(entry, info); } } @@ -1147,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, + COMPUTE_SHADER_INDEX, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupImage(tic, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } @@ -1186,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); return; } - // Align the size to avoid bad std140 interactions - const std::size_t size = - Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); + const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto info = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + const u64 alignment = device.GetUniformBufferAlignment(); + const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } @@ -1206,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // because Vulkan doesn't like empty buffers. // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. - static constexpr std::size_t dummy_size = 4; + static constexpr size_t dummy_size = 4; const auto info = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; @@ -1217,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } -void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, - const UniformTexelEntry& entry) { - const auto view = texture_cache.GetTextureSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - auto view = texture_cache.GetTextureSurface(texture.tic, entry); - ASSERT(!view->IsBufferView()); - - const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - const auto sampler = sampler_cache.GetSampler(texture.tsc); - update_descriptor_queue.AddSampledImage(sampler, image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - sampled_views.push_back(ImageView{std::move(view), image_layout}); -} - -void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, - const StorageTexelEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { - auto view = texture_cache.GetImageSurface(tic, entry); - - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - - UNIMPLEMENTED_IF(tic.IsBuffer()); - - const VkImageView image_view = - view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - update_descriptor_queue.AddImage(image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_GENERAL; - image_views.push_back(ImageView{std::move(view), image_layout}); -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; @@ -1457,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } -std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { - std::size_t size = CalculateVertexArraysSize(); +size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { + size_t size = CalculateVertexArraysSize(); if (is_indexed) { size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); } @@ -1466,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) return size; } -std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { +size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { return Tegra::Engines::KeplerCompute::NumConstBuffers * (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); } -std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { +size_t RasterizerVulkan::CalculateVertexArraysSize() const { const auto& regs = maxwell3d.regs; - std::size_t size = 0; + size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { // This implementation assumes that all attributes are used in the shader. const GPUVAddr start{regs.vertex_array[index].StartAddress()}; @@ -1486,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { return size; } -std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - return static_cast(maxwell3d.regs.index_array.count) * - static_cast(maxwell3d.regs.index_array.FormatSizeInBytes()); +size_t RasterizerVulkan::CalculateIndexBufferSize() const { + return static_cast(maxwell3d.regs.index_array.count) * + static_cast(maxwell3d.regs.index_array.FormatSizeInBytes()); } -std::size_t RasterizerVulkan::CalculateConstBufferSize( +size_t RasterizerVulkan::CalculateConstBufferSize( const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { if (entry.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing @@ -1502,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } } -RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast(regs.rt_control.count); - - RenderPassParams params; - params.color_formats = {}; - std::size_t color_texceptions = 0; - - std::size_t index = 0; - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto& rendertarget = regs.rt[rt]; - if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { - continue; - } - params.color_formats[index] = static_cast(rendertarget.format); - color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; - ++index; - } - params.num_color_attachments = static_cast(index); - params.texceptions = static_cast(color_texceptions); - - params.zeta_format = regs.zeta_enable ? static_cast(regs.zeta.format) : 0; - params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; - return params; -} - VkBuffer RasterizerVulkan::DefaultBuffer() { if (default_buffer) { return *default_buffer; } - default_buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 30ec58eb4..990f9e031 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -11,11 +11,11 @@ #include #include -#include #include "common/common_types.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -24,10 +24,9 @@ #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -49,60 +48,9 @@ namespace Vulkan { struct VKScreenInfo; -using ImageViewsPack = boost::container::static_vector; - -struct FramebufferCacheKey { - VkRenderPass renderpass{}; - u32 width = 0; - u32 height = 0; - u32 layers = 0; - ImageViewsPack views; - - std::size_t Hash() const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, static_cast(renderpass)); - for (const auto& view : views) { - boost::hash_combine(hash, static_cast(view)); - } - boost::hash_combine(hash, width); - boost::hash_combine(hash, height); - boost::hash_combine(hash, layers); - return hash; - } - - bool operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(renderpass, views, width, height, layers) == - std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); - } - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash { - std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - class StateTracker; class BufferBindings; -struct ImageView { - View view; - VkImageLayout* layout = nullptr; -}; - class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -123,15 +71,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -145,11 +96,17 @@ public: } /// Maximum supported size that a constbuffer can have in bytes. - static constexpr std::size_t MaxConstbufferSize = 0x10000; + static constexpr size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); private: + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); + struct DrawParameters { void Draw(vk::CommandBuffer cmdbuf) const; @@ -160,23 +117,8 @@ private: bool is_indexed = 0; }; - using ColorAttachments = std::array; - using ZetaAttachment = View; - - using Texceptions = std::bitset; - - static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; - static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); - void FlushWork(); - /// @brief Updates the currently bound attachments - /// @param is_clear True when the framebuffer is updated as a clear - /// @return Bitfield of attachments being used as sampled textures - Texceptions UpdateAttachments(bool is_clear); - - std::tuple ConfigureFramebuffers(VkRenderPass renderpass); - /// Setups geometry buffers and state. DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, bool is_instanced); @@ -184,17 +126,12 @@ private: /// Setup descriptors in the graphics pipeline. void SetupShaderDescriptors(const std::array& shaders); - void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, - const ZetaAttachment& zeta); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); - void SetupVertexArrays(BufferBindings& buffer_bindings); void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -240,14 +177,6 @@ private: void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); - void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); - - void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); - - void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); - - void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -264,18 +193,16 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); - std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - - std::size_t CalculateComputeStreamBufferSize() const; + size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - std::size_t CalculateVertexArraysSize() const; + size_t CalculateComputeStreamBufferSize() const; - std::size_t CalculateIndexBufferSize() const; + size_t CalculateVertexArraysSize() const; - std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer) const; + size_t CalculateIndexBufferSize() const; - RenderPassParams GetRenderPassParams(Texceptions texceptions) const; + size_t CalculateConstBufferSize(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer) const; VkBuffer DefaultBuffer(); @@ -290,18 +217,19 @@ private: StateTracker& state_tracker; VKScheduler& scheduler; + VKStreamBuffer stream_buffer; VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; - VKRenderPassCache renderpass_cache; + BlitImageHelper blit_image; QuadArrayPass quad_array_pass; QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; - VKTextureCache texture_cache; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; - VKSamplerCache sampler_cache; VKQueryCache query_cache; VKFenceManager fence_manager; @@ -310,16 +238,11 @@ private: vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; - ColorAttachments color_attachments; - ZetaAttachment zeta_attachment; - - std::vector sampled_views; - std::vector image_views; + boost::container::static_vector image_view_indices; + std::array image_view_ids; + boost::container::static_vector sampler_handles; u32 draw_counter = 0; - - // TODO(Rodrigo): Invalidate on image destruction - std::unordered_map framebuffer_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index e812c7dd6..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/cityhash.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -std::size_t RenderPassParams::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); -} - -bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; -} - -VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} - -VKRenderPassCache::~VKRenderPassCache() = default; - -VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { - const auto [pair, is_cache_miss] = cache.try_emplace(params); - auto& entry = pair->second; - if (is_cache_miss) { - entry = CreateRenderPass(params); - } - return *entry; -} - -vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { - using namespace VideoCore::Surface; - const std::size_t num_attachments = static_cast(params.num_color_attachments); - - std::vector descriptors; - descriptors.reserve(num_attachments); - - std::vector color_references; - color_references.reserve(num_attachments); - - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto guest_format = static_cast(params.color_formats[rt]); - const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(pixel_format)); - - // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. - const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = color_layout, - .finalLayout = color_layout, - }); - - color_references.push_back({ - .attachment = static_cast(rt), - .layout = color_layout, - }); - } - - VkAttachmentReference zeta_attachment_ref; - const bool has_zeta = params.zeta_format != 0; - if (has_zeta) { - const auto guest_format = static_cast(params.zeta_format); - const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(pixel_format)); - - const VkImageLayout zeta_layout = params.zeta_texception != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = 0, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = zeta_layout, - .finalLayout = zeta_layout, - }); - - zeta_attachment_ref = { - .attachment = static_cast(num_attachments), - .layout = zeta_layout, - }; - } - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(color_references.size()), - .pColorAttachments = color_references.data(), - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - VkAccessFlags access = 0; - VkPipelineStageFlags stage = 0; - if (!color_references.empty()) { - access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - } - - if (has_zeta) { - access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - } - - const VkSubpassDependency subpass_dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = stage, - .dstStageMask = stage, - .srcAccessMask = 0, - .dstAccessMask = access, - .dependencyFlags = 0, - }; - - return device.GetLogical().CreateRenderPass({ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptors.size()), - .pAttachments = descriptors.data(), - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &subpass_dependency, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 652ecef7b..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include -#include - -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" - -namespace Vulkan { - -class VKDevice; - -struct RenderPassParams { - std::array color_formats; - u8 num_color_attachments; - u8 texceptions; - - u8 zeta_format; - u8 zeta_texception; - - std::size_t Hash() const noexcept; - - bool operator==(const RenderPassParams& rhs) const noexcept; - - bool operator!=(const RenderPassParams& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -} // namespace Vulkan - -namespace std { - -template <> -struct hash { - std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKRenderPassCache final { -public: - explicit VKRenderPassCache(const VKDevice& device_); - ~VKRenderPassCache(); - - VkRenderPass GetRenderPass(const RenderPassParams& params); - -private: - vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; - - const VKDevice& device; - std::unordered_map cache; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b859691fa..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/textures/texture.h" - -using Tegra::Texture::TextureMipmapFilter; - -namespace Vulkan { - -namespace { - -VkBorderColor ConvertBorderColor(std::array color) { - // TODO(Rodrigo): Manage integer border colors - if (color == std::array{0, 0, 0, 0}) { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } else if (color == std::array{0, 0, 0, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else if (color == std::array{1, 1, 1, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } - if (color[0] + color[1] + color[2] > 1.35f) { - // If color elements are brighter than roughly 0.5 average, use white border - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } else if (color[3] > 0.5f) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } -} - -} // Anonymous namespace - -VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} - -VKSamplerCache::~VKSamplerCache() = default; - -vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); - const std::array color = tsc.GetBorderColor(); - - VkSamplerCustomBorderColorCreateInfoEXT border{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, - .pNext = nullptr, - .customBorderColor = {}, - .format = VK_FORMAT_UNDEFINED, - }; - std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - - return device.GetLogical().CreateSampler({ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = arbitrary_borders ? &border : nullptr, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.GetLodBias(), - .anisotropyEnable = - static_cast(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = tsc.GetMaxAnisotropy(), - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); -} - -VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { - return *sampler; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index 3f22c4610..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/sampler_cache.h" -#include "video_core/textures/texture.h" - -namespace Vulkan { - -class VKDevice; - -class VKSamplerCache final : public VideoCommon::SamplerCache { -public: - explicit VKSamplerCache(const VKDevice& device_); - ~VKSamplerCache(); - -protected: - vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - VkSampler ToSamplerType(const vk::Sampler& sampler) const override; - -private: - const VKDevice& device; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..c104c6fe3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -16,6 +16,7 @@ #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() { AcquireNewChunk(); } -void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area) { - if (renderpass == state.renderpass && framebuffer == state.framebuffer && +void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { + const VkRenderPass renderpass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && render_area.width == state.render_area.width && render_area.height == state.render_area.height) { return; } - const bool end_renderpass = state.renderpass != nullptr; + EndRenderPass(); state.renderpass = renderpass; - state.framebuffer = framebuffer; + state.framebuffer = framebuffer_handle; state.render_area = render_area; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, - .renderArea = - { - .offset = {.x = 0, .y = 0}, - .extent = render_area, - }, - .clearValueCount = 0, - .pClearValues = nullptr, - }; - - Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { - if (end_renderpass) { - cmdbuf.EndRenderPass(); - } + Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer_handle, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); }); + num_renderpass_images = framebuffer->NumImages(); + renderpass_images = framebuffer->Images(); + renderpass_image_ranges = framebuffer->ImageRanges(); } void VKScheduler::RequestOutsideRenderPassOperationContext() { @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() { if (!state.renderpass) { return; } + Record([num_images = num_renderpass_images, images = renderpass_images, + ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { + std::array barriers; + for (size_t i = 0; i < num_images; ++i) { + barriers[i] = VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange = ranges[i], + }; + } + cmdbuf.EndRenderPass(); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, + vk::Span(barriers.data(), num_images)); + }); state.renderpass = nullptr; - Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); + num_renderpass_images = 0; } void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6d3a5da0b..0a36c8fad 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,6 +17,7 @@ namespace Vulkan { class CommandPool; +class Framebuffer; class MasterSemaphore; class StateTracker; class VKDevice; @@ -52,8 +53,7 @@ public: void DispatchWork(); /// Requests to begin a renderpass. - void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area); + void RequestRenderpass(const Framebuffer* framebuffer); /// Requests the current executino context to be able to execute operations only allowed outside /// of a renderpass. @@ -62,6 +62,9 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(VkPipeline pipeline); + /// Invalidates current command buffer state except for render passes + void InvalidateState(); + /// Assigns the query cache. void SetQueryCache(VKQueryCache& query_cache_) { query_cache = &query_cache_; @@ -170,8 +173,6 @@ private: void AllocateNewContext(); - void InvalidateState(); - void EndPendingOperations(); void EndRenderPass(); @@ -192,6 +193,11 @@ private: std::thread worker_thread; State state; + + u32 num_renderpass_images = 0; + std::array renderpass_images{}; + std::array renderpass_image_ranges{}; + Common::SPSCQueue> chunk_queue; Common::SPSCQueue> chunk_reserve; std::mutex mutex; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 72954d0e3..09d6f9f35 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -102,7 +102,7 @@ struct GenericVaryingDescription { bool is_scalar = false; }; -spv::Dim GetSamplerDim(const Sampler& sampler) { +spv::Dim GetSamplerDim(const SamplerEntry& sampler) { ASSERT(!sampler.is_buffer); switch (sampler.type) { case Tegra::Shader::TextureType::Texture1D: @@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { } } -std::pair GetImageDim(const Image& image) { +std::pair GetImageDim(const ImageEntry& image) { switch (image.type) { case Tegra::Shader::ImageType::Texture1D: return {spv::Dim::Dim1D, false}; @@ -980,7 +980,7 @@ private: return binding; } - void DeclareImage(const Image& image, u32& binding) { + void DeclareImage(const ImageEntry& image, u32& binding) { const auto [dim, arrayed] = GetImageDim(image); constexpr int depth = 0; constexpr bool ms = false; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index df1812514..ad91ad5de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -21,10 +21,10 @@ class VKDevice; namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::Sampler; -using SamplerEntry = VideoCommon::Shader::Sampler; -using StorageTexelEntry = VideoCommon::Shader::Image; -using ImageEntry = VideoCommon::Shader::Image; +using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using StorageTexelEntry = VideoCommon::Shader::ImageEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; constexpr u32 DESCRIPTOR_SET = 0; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..38a0be7f2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -13,18 +13,13 @@ namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { - // Avoid undefined behavior by copying to a staging allocation - ASSERT(code_size % sizeof(u32) == 0); - const auto data = std::make_unique(code_size / sizeof(u32)); - std::memcpy(data.get(), code_data, code_size); - +vk::ShaderModule BuildShader(const VKDevice& device, std::span code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = data.get(), + .codeSize = static_cast(code.size_bytes()), + .pCode = code.data(), }); } diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..dce34a140 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "common/common_types.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -11,6 +13,6 @@ namespace Vulkan { class VKDevice; -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::span code); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 50164cc08..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table; using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { + static constexpr std::array INVALIDATION_FLAGS{ + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, + DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + }; Flags flags{}; - flags[Viewports] = true; - flags[Scissors] = true; - flags[DepthBias] = true; - flags[BlendConstants] = true; - flags[DepthBounds] = true; - flags[StencilProperties] = true; - flags[CullMode] = true; - flags[DepthBoundsEnable] = true; - flags[DepthTestEnable] = true; - flags[DepthWriteEnable] = true; - flags[DepthCompareOp] = true; - flags[FrontFace] = true; - flags[StencilOp] = true; - flags[StencilTestEnable] = true; + for (const int flag : INVALIDATION_FLAGS) { + flags[flag] = true; + } return flags; } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -52,6 +52,14 @@ public: current_topology = INVALID_TOPOLOGY; } + void InvalidateViewports() { + flags[Dirty::Viewports] = true; + } + + void InvalidateScissors() { + flags[Dirty::Scissors] = true; + } + bool TouchViewports() { return Exchange(Dirty::Viewports, false); } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..419cb154d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -19,6 +19,10 @@ namespace Vulkan { namespace { +constexpr VkBufferUsageFlags BUFFER_USAGE = + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, - VkBufferUsageFlags usage) +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} { - CreateBuffers(usage); + CreateBuffers(); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { +std::pair VKStreamBuffer::Map(u64 size, u64 alignment) { ASSERT(size <= stream_buffer_size); mapped_size = size; @@ -76,7 +79,6 @@ std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); - bool invalidated = false; if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; @@ -90,11 +92,9 @@ std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { // Ensure that we don't wait for uncommitted fences. scheduler.Flush(); - - invalidated = true; } - return {memory.Map(offset, size), offset, invalidated}; + return std::make_pair(memory.Map(offset, size), offset); } void VKStreamBuffer::Unmap(u64 size) { @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) { watch.tick = scheduler.CurrentTick(); } -void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { +void VKStreamBuffer::CreateBuffers() { const auto memory_properties = device.GetPhysical().GetMemoryProperties(); const u32 preferred_type = GetMemoryType(memory_properties); const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { .pNext = nullptr, .flags = 0, .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), - .usage = usage, + .usage = BUFFER_USAGE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..1428f77bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include #include "common/common_types.h" @@ -19,17 +19,15 @@ class VKScheduler; class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - VkBufferUsageFlags usage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); ~VKStreamBuffer(); /** * Reserves a region of memory from the stream buffer. * @param size Size to reserve. - * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer - * offset and a boolean that's true when buffer has been invalidated. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::pair Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); @@ -49,7 +47,7 @@ private: }; /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VkBufferUsageFlags usage); + void CreateBuffers(); /// Increases the amount of watches available. void ReserveWatches(std::vector& watches, std::size_t grow_size); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae2e3322c..261808391 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -4,614 +4,1103 @@ #include #include -#include -#include -#include -#include +#include #include -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/morton.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" namespace Vulkan { -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; - +using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; +using Tegra::Texture::TextureMipmapFilter; +using VideoCommon::BufferImageCopy; +using VideoCommon::ImageInfo; +using VideoCommon::ImageType; +using VideoCommon::SubresourceRange; +using VideoCore::Surface::IsPixelFormatASTC; namespace { -VkImageType SurfaceTargetToImage(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +constexpr VkBorderColor ConvertBorderColor(const std::array& color) { + if (color == std::array{0, 0, 0, 0}) { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } else if (color == std::array{0, 0, 0, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } else if (color == std::array{1, 1, 1, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } else if (color[3] > 0.5f) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } else { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } +} + +[[nodiscard]] VkImageType ConvertImageType(const ImageType type) { + switch (type) { + case ImageType::e1D: return VK_IMAGE_TYPE_1D; - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: + case ImageType::e2D: + case ImageType::Linear: return VK_IMAGE_TYPE_2D; - case SurfaceTarget::Texture3D: + case ImageType::e3D: return VK_IMAGE_TYPE_3D; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - return {}; + case ImageType::Buffer: + break; } - UNREACHABLE_MSG("Unknown texture target={}", target); + UNREACHABLE_MSG("Invalid image type={}", type); return {}; } -VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { - if (pixel_format < PixelFormat::MaxColorFormat) { - return VK_IMAGE_ASPECT_COLOR_BIT; - } else if (pixel_format < PixelFormat::MaxDepthFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT; - } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } else { - UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); - return VK_IMAGE_ASPECT_COLOR_BIT; +[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { + switch (num_samples) { + case 1: + return VK_SAMPLE_COUNT_1_BIT; + case 2: + return VK_SAMPLE_COUNT_2_BIT; + case 4: + return VK_SAMPLE_COUNT_4_BIT; + case 8: + return VK_SAMPLE_COUNT_8_BIT; + case 16: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return VK_SAMPLE_COUNT_1_BIT; } } -VkImageViewType GetImageViewType(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return VK_IMAGE_VIEW_TYPE_1D; - case SurfaceTarget::Texture2D: - return VK_IMAGE_VIEW_TYPE_2D; - case SurfaceTarget::Texture3D: - return VK_IMAGE_VIEW_TYPE_3D; - case SurfaceTarget::Texture1DArray: - return VK_IMAGE_VIEW_TYPE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return VK_IMAGE_VIEW_TYPE_CUBE; - case SurfaceTarget::TextureCubeArray: - return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - case SurfaceTarget::TextureBuffer: - break; +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { + const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); + VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (info.type == ImageType::e2D && info.resources.layers >= 6 && + info.size.width == info.size.height) { + flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; } - UNREACHABLE(); - return {}; -} - -vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, - std::size_t host_memory_size) { - // TODO(Rodrigo): Move texture buffer creation to the buffer cache - return device.GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + if (info.type == ImageType::e3D) { + flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + } + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + if (format_info.attachable) { + switch (VideoCore::Surface::GetFormatType(info.format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + break; + case VideoCore::Surface::SurfaceType::Depth: + case VideoCore::Surface::SurfaceType::DepthStencil: + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + break; + default: + UNREACHABLE_MSG("Invalid surface type"); + } + } + if (format_info.storage) { + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); + return VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, - .flags = 0, - .size = static_cast(host_memory_size), - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .flags = flags, + .imageType = ConvertImageType(info.type), + .format = format_info.format, + .extent = + { + .width = info.size.width >> samples_x, + .height = info.size.height >> samples_y, + .depth = info.size.depth, + }, + .mipLevels = static_cast(info.resources.levels), + .arrayLayers = static_cast(info.resources.layers), + .samples = ConvertSampleCount(info.num_samples), + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); -} - -VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, - const SurfaceParams& params, VkBuffer buffer, - std::size_t host_memory_size) { - ASSERT(params.IsBuffer()); - - return { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = buffer, - .format = - MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, - .offset = 0, - .range = static_cast(host_memory_size), + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; } -VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { - ASSERT(!params.IsBuffer()); - - const auto [format, attachable, storage] = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); +[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { + if (info.type == ImageType::Buffer) { + return vk::Image{}; + } + return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); +} - VkImageCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, +[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { + if (info.type != ImageType::Buffer) { + return vk::Buffer{}; + } + const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); + return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .imageType = SurfaceTargetToImage(params.target), - .format = format, - .extent = {}, - .mipLevels = params.num_levels, - .arrayLayers = static_cast(params.GetNumLayers()), - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .size = info.size.width * bytes_per_block, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - if (attachable) { - ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT - : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } - if (storage) { - ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; - } - - switch (params.target) { - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - [[fallthrough]]; - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - ci.extent = {params.width, params.height, 1}; - break; - case SurfaceTarget::Texture3D: - ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; - ci.extent = {params.width, params.height, params.depth}; - break; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - } - - return ci; + }); } -u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); +[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { + switch (VideoCore::Surface::GetFormatType(format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case VideoCore::Surface::SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + UNREACHABLE_MSG("Invalid surface type"); + return VkImageAspectFlags{}; + } } -} // Anonymous namespace - -CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, - GPUVAddr gpu_addr_, const SurfaceParams& params_) - : SurfaceBase{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, - memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { - if (params.IsBuffer()) { - buffer = CreateBuffer(device, params, host_memory_size); - commit = memory_manager.Commit(buffer, false); - - const auto buffer_view_ci = - GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); - format = buffer_view_ci.format; - - buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); - } else { - const auto image_ci = GenerateImageCreateInfo(device, params); - format = image_ci.format; - - image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); - commit = memory_manager.Commit(image->GetHandle(), false); +[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { + if (info.IsRenderTarget()) { + return ImageAspectMask(info.format); } - - // TODO(Rodrigo): Move this to a virtual function. - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; + const bool is_first = info.Swizzle()[0] == SwizzleSource::R; + switch (info.format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; + case PixelFormat::D16_UNORM: + case PixelFormat::D32_FLOAT: + return VK_IMAGE_ASPECT_DEPTH_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; } - main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); } -CachedSurface::~CachedSurface() = default; - -void CachedSurface::UploadTexture(const std::vector& staging_buffer) { - // To upload data we have to be outside of a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, + const ImageView* image_view) { + const auto pixel_format = image_view->format; + return VkAttachmentDescription{ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, + .samples = image_view->Samples(), + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} - if (params.IsBuffer()) { - UploadBuffer(staging_buffer); - } else { - UploadImage(staging_buffer); +[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { + switch (swizzle) { + case SwizzleSource::Zero: + return VK_COMPONENT_SWIZZLE_ZERO; + case SwizzleSource::R: + return VK_COMPONENT_SWIZZLE_R; + case SwizzleSource::G: + return VK_COMPONENT_SWIZZLE_G; + case SwizzleSource::B: + return VK_COMPONENT_SWIZZLE_B; + case SwizzleSource::A: + return VK_COMPONENT_SWIZZLE_A; + case SwizzleSource::OneFloat: + case SwizzleSource::OneInt: + return VK_COMPONENT_SWIZZLE_ONE; } + UNREACHABLE_MSG("Invalid swizzle={}", swizzle); + return VK_COMPONENT_SWIZZLE_ZERO; } -void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - UNIMPLEMENTED_IF(params.IsBuffer()); - - if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { - LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); +[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { + switch (type) { + case VideoCommon::ImageViewType::e1D: + return VK_IMAGE_VIEW_TYPE_1D; + case VideoCommon::ImageViewType::e2D: + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Cube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case VideoCommon::ImageViewType::e3D: + return VK_IMAGE_VIEW_TYPE_3D; + case VideoCommon::ImageViewType::e1DArray: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VideoCommon::ImageViewType::e2DArray: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VideoCommon::ImageViewType::CubeArray: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case VideoCommon::ImageViewType::Rect: + LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} - // We can't copy images to buffers inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( + VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { + return VkImageSubresourceLayers{ + .aspectMask = aspect_mask, + .mipLevel = static_cast(subresource.base_level), + .baseArrayLayer = static_cast(subresource.base_layer), + .layerCount = static_cast(subresource.num_layers), + }; +} - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); +[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { + return VkOffset3D{ + .x = offset3d.x, + .y = offset3d.y, + .z = offset3d.z, + }; +} - const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - // TODO(Rodrigo): Do this in a single copy - for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, - copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); - }); - } - scheduler.Finish(); +[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { + return VkExtent3D{ + .width = static_cast(extent3d.width), + .height = static_cast(extent3d.height), + .depth = static_cast(extent3d.depth), + }; +} - // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. - std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), - host_memory_size); +[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, + VkImageAspectFlags aspect_mask) noexcept { + return VkImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), + .srcOffset = MakeOffset3D(copy.src_offset), + .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), + .dstOffset = MakeOffset3D(copy.dst_offset), + .extent = MakeExtent3D(copy.extent), + }; } -void CachedSurface::DecorateSurfaceName() { - // TODO(Rodrigo): Add name decorations +[[nodiscard]] std::vector TransformBufferCopies( + std::span copies, size_t buffer_offset) { + std::vector result(copies.size()); + std::ranges::transform( + copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { + return VkBufferCopy{ + .srcOffset = static_cast(copy.src_offset + buffer_offset), + .dstOffset = static_cast(copy.dst_offset), + .size = static_cast(copy.size), + }; + }); + return result; } -View CachedSurface::CreateView(const ViewParams& view_params) { - // TODO(Rodrigo): Add name decorations - return views[view_params] = std::make_shared(device, *this, view_params); +[[nodiscard]] std::vector TransformBufferImageCopies( + std::span copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { + struct Maker { + VkBufferImageCopy operator()(const BufferImageCopy& copy) const { + return VkBufferImageCopy{ + .bufferOffset = copy.buffer_offset + buffer_offset, + .bufferRowLength = copy.buffer_row_length, + .bufferImageHeight = copy.buffer_image_height, + .imageSubresource = + { + .aspectMask = aspect_mask, + .mipLevel = static_cast(copy.image_subresource.base_level), + .baseArrayLayer = static_cast(copy.image_subresource.base_layer), + .layerCount = static_cast(copy.image_subresource.num_layers), + }, + .imageOffset = + { + .x = copy.image_offset.x, + .y = copy.image_offset.y, + .z = copy.image_offset.z, + }, + .imageExtent = + { + .width = copy.image_extent.width, + .height = copy.image_extent.height, + .depth = copy.image_extent.depth, + }, + }; + } + size_t buffer_offset; + VkImageAspectFlags aspect_mask; + }; + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + std::vector result(copies.size() * 2); + std::ranges::transform(copies, result.begin(), + Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); + std::ranges::transform(copies, result.begin() + copies.size(), + Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); + return result; + } else { + std::vector result(copies.size()); + std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); + return result; + } } -void CachedSurface::UploadBuffer(const std::vector& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, + const SubresourceRange& range) { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = static_cast(range.base.level), + .levelCount = static_cast(range.extent.levels), + .baseArrayLayer = static_cast(range.base.layer), + .layerCount = static_cast(range.extent.layers), + }; +} - scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = host_memory_size](vk::CommandBuffer cmdbuf) { - VkBufferCopy copy; - copy.srcOffset = 0; - copy.dstOffset = 0; - copy.size = size; - cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { + SubresourceRange range = image_view->range; + if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + // Slice image views always affect a single layer, but their subresource range corresponds + // to the slice. Override the value to affect a single layer. + range.base.layer = 0; + range.extent.layers = 1; + } + return MakeSubresourceRange(ImageAspectMask(image_view->format), range); +} - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = dst_buffer; - barrier.offset = 0; - barrier.size = size; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - 0, {}, barrier, {}); - }); +[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { + return VkImageSubresourceLayers{ + .aspectMask = ImageAspectMask(image_view->format), + .mipLevel = static_cast(image_view->range.base.level), + .baseArrayLayer = static_cast(image_view->range.base.layer), + .layerCount = static_cast(image_view->range.extent.layers), + }; } -void CachedSurface::UploadImage(const std::vector& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - for (u32 level = 0; level < params.num_levels; ++level) { - const VkBufferImageCopy copy = GetBufferImageCopy(level); - if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - std::array copies = {copy, copy}; - copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - copies); - }); - } else { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); - }); - } +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; } } -VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - return { - .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = +void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, + VkImageAspectFlags aspect_mask, bool is_initialized, + std::span copies) { + static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = ACCESS_FLAGS, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { - .aspectMask = image->GetAspectMask(), - .mipLevel = level, + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, .baseArrayLayer = 0, - .layerCount = static_cast(params.GetNumLayers()), + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - .imageOffset = {.x = 0, .y = 0, .z = 0}, - .imageExtent = + }; + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = ACCESS_FLAGS, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { - .width = params.GetMipWidth(level), - .height = params.GetMipHeight(level), - .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); + // TODO: Move this to another API + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + write_barrier); } -VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { - return {image->GetAspectMask(), 0, params.num_levels, 0, - static_cast(params.GetNumLayers())}; +[[nodiscard]] VkImageBlit MakeImageBlit(const std::array& dst_region, + const std::array& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageBlit{ + .srcSubresource = src_layers, + .srcOffsets = + { + { + .x = src_region[0].x, + .y = src_region[0].y, + .z = 0, + }, + { + .x = src_region[1].x, + .y = src_region[1].y, + .z = 1, + }, + }, + .dstSubresource = dst_layers, + .dstOffsets = + { + { + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, + }, + { + .x = dst_region[1].x, + .y = dst_region[1].y, + .z = 1, + }, + }, + }; } -CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, - const ViewParams& view_params_) - : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, - image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, - aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, - base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, - image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - base_layer = 0; - num_layers = 1; - base_slice = view_params_.base_layer; - num_slices = view_params_.num_layers; - } else { - base_layer = view_params_.base_layer; - num_layers = view_params_.num_layers; - } +[[nodiscard]] VkImageResolve MakeImageResolve(const std::array& dst_region, + const std::array& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageResolve{ + .srcSubresource = src_layers, + .srcOffset = + { + .x = src_region[0].x, + .y = src_region[0].y, + .z = 0, + }, + .dstSubresource = dst_layers, + .dstOffset = + { + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, + }, + .extent = + { + .width = static_cast(dst_region[1].x - dst_region[0].x), + .height = static_cast(dst_region[1].y - dst_region[0].y), + .depth = 1, + }, + }; } -CachedSurfaceView::~CachedSurfaceView() = default; - -VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (last_image_view && last_swizzle == new_swizzle) { - return last_image_view; +struct RangedBarrierRange { + u32 min_mip = std::numeric_limits::max(); + u32 max_mip = std::numeric_limits::min(); + u32 min_layer = std::numeric_limits::max(); + u32 max_layer = std::numeric_limits::min(); + + void AddLayers(const VkImageSubresourceLayers& layers) { + min_mip = std::min(min_mip, layers.mipLevel); + max_mip = std::max(max_mip, layers.mipLevel + 1); + min_layer = std::min(min_layer, layers.baseArrayLayer); + max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); } - last_swizzle = new_swizzle; - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - auto& image_view = entry->second; - if (!is_cache_miss) { - return last_image_view = *image_view; + VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = min_mip, + .levelCount = max_mip - min_mip, + .baseArrayLayer = min_layer, + .layerCount = max_layer - min_layer, + }; } +}; - std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), - MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { - // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. - std::swap(swizzle[0], swizzle[2]); - } +} // Anonymous namespace - // Games can sample depth or stencil values on textures. This is decided by the swizzle value on - // hardware. To emulate this on Vulkan we specify it in the aspect. - VkImageAspectFlags aspect = aspect_mask; - if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - const bool is_first = x_source == SwizzleSource::R; - switch (surface_params.pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; - break; - case PixelFormat::S8_UINT_D24_UNORM: - aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - break; - default: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - UNIMPLEMENTED(); - } +void TextureCacheRuntime::Finish() { + scheduler.Finish(); +} - // Make sure we sample the first component - std::transform( - swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { - return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; - }); - } +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); + return ImageBufferMap{ + .handle = *buffer.handle, + .map = buffer.commit->Map(size), + }; +} - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ASSERT(base_slice == 0); - ASSERT(num_slices == surface_params.depth); +void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); + const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; + const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; + ASSERT(aspect_mask == ImageAspectMask(dst.format)); + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); + return; } - - image_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = image_view_type, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = swizzle[0], - .g = swizzle[1], - .b = swizzle[2], - .a = swizzle[3], + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!device.IsBlitDepthStencilSupported()) { + UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), + dst_region, src_region, filter, operation); + return; + } + } + ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(!(is_dst_msaa && !is_src_msaa)); + ASSERT(operation == Fermi2D::Operation::SrcCopy); + + const VkImage dst_image = dst.ImageHandle(); + const VkImage src_image = src.ImageHandle(); + const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); + const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); + const bool is_resolve = is_src_msaa && !is_dst_msaa; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, + aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, }, - .subresourceRange = - { - .aspectMask = aspect, - .baseMipLevel = base_level, - .levelCount = num_levels, - .baseArrayLayer = base_layer, - .layerCount = num_layers, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + if (is_resolve) { + cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); + } else { + const bool is_linear = filter == Fermi2D::Filter::Bilinear; + const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + cmdbuf.BlitImage( + src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); + } + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); }); - - return last_image_view = *image_view; } -VkImageView CachedSurfaceView::GetAttachment() { - if (render_target) { - return *render_target; +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + switch (dst_view.format) { + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); + } + break; + case PixelFormat::R32_FLOAT: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32ToR32(dst, src_view); + } + break; + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::D32_FLOAT: + if (src_view.format == PixelFormat::R32_FLOAT) { + return blit_image_helper.ConvertR32ToD32(dst, src_view); + } + break; + default: + break; } + UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); +} - VkImageViewCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = VK_IMAGE_VIEW_TYPE_1D, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, +void TextureCacheRuntime::CopyImage(Image& dst, Image& src, + std::span copies) { + std::vector vk_copies(copies.size()); + const VkImageAspectFlags aspect_mask = dst.AspectMask(); + ASSERT(aspect_mask == src.AspectMask()); + + std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { + return MakeImageCopy(copy, aspect_mask); + }); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkImageCopy& copy : vk_copies) { + dst_range.AddLayers(copy.dstSubresource); + src_range.AddLayers(copy.srcSubresource); + } + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(aspect_mask), }, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = base_level, - .levelCount = num_levels, - .baseArrayLayer = 0, - .layerCount = 0, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), }, - }; - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; - ci.subresourceRange.baseArrayLayer = base_slice; - ci.subresourceRange.layerCount = num_slices; + }; + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, read_barriers); + cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); + }); +} + +Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, + image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + aspect_mask(ImageAspectMask(info.format)) { + if (image) { + commit = runtime.memory_manager.Commit(image, false); } else { - ci.viewType = image_view_type; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; + commit = runtime.memory_manager.Commit(buffer, false); + } + if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { + flags |= VideoCommon::ImageFlagBits::Converted; + } + if (runtime.device.HasDebuggingToolAttached()) { + if (image) { + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } else { + buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } } - render_target = device.GetLogical().CreateImageView(ci); - return *render_target; } -VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKMemoryManager& memory_manager_, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_) - : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), - device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ - staging_pool_} {} - -VKTextureCache::~VKTextureCache() = default; - -Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared(device, memory_manager, scheduler, staging_pool, - gpu_addr, params); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + const VkBuffer src_buffer = map.handle; + const VkImage vk_image = *image; + const VkImageAspectFlags vk_aspect_mask = aspect_mask; + const bool is_initialized = std::exchange(initialized, true); + scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, + vk_copies](vk::CommandBuffer cmdbuf) { + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); + }); } -void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - UNIMPLEMENTED_IF(src_3d); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); + const VkBuffer src_buffer = map.handle; + const VkBuffer dst_buffer = *buffer; + scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); +} - // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and - // dimension respectively. - const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; - const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; +void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, + vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); + }); +} - const u32 extent_z = dst_3d ? copy_params.depth : 1; - const u32 num_layers = dst_3d ? 1 : copy_params.depth; +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, + image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( + image.info.num_samples)} { + const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); + std::array swizzle{ + SwizzleSource::R, + SwizzleSource::G, + SwizzleSource::B, + SwizzleSource::A, + }; + if (!info.IsRenderTarget()) { + swizzle = info.Swizzle(); + if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); + } + } + const VkFormat vk_format = + MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; + const VkImageViewCreateInfo create_info{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image.Handle(), + .viewType = VkImageViewType{}, + .format = vk_format, + .components{ + .r = ComponentSwizzle(swizzle[0]), + .g = ComponentSwizzle(swizzle[1]), + .b = ComponentSwizzle(swizzle[2]), + .a = ComponentSwizzle(swizzle[3]), + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), + }; + const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + VkImageViewCreateInfo ci{create_info}; + ci.viewType = ImageViewType(view_type); + if (num_layers) { + ci.subresourceRange.layerCount = *num_layers; + } + vk::ImageView handle = device->GetLogical().CreateImageView(ci); + if (device->HasDebuggingToolAttached()) { + handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + } + image_views[static_cast(view_type)] = std::move(handle); + }; + switch (info.type) { + case VideoCommon::ImageViewType::e1D: + case VideoCommon::ImageViewType::e1DArray: + create(VideoCommon::ImageViewType::e1D, 1); + create(VideoCommon::ImageViewType::e1DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e1DArray); + break; + case VideoCommon::ImageViewType::e2D: + case VideoCommon::ImageViewType::e2DArray: + create(VideoCommon::ImageViewType::e2D, 1); + create(VideoCommon::ImageViewType::e2DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e2DArray); + break; + case VideoCommon::ImageViewType::e3D: + create(VideoCommon::ImageViewType::e3D, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e3D); + break; + case VideoCommon::ImageViewType::Cube: + case VideoCommon::ImageViewType::CubeArray: + create(VideoCommon::ImageViewType::Cube, 6); + create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + break; + case VideoCommon::ImageViewType::Rect: + UNIMPLEMENTED(); + break; + case VideoCommon::ImageViewType::Buffer: + buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = image.Buffer(), + .format = vk_format, + .offset = 0, // TODO: Redesign buffer cache to support this + .range = image.guest_size_bytes, + }); + break; + } +} - // We can't copy inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params} {} - src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); +VkImageView ImageView::DepthView() { + if (depth_view) { + return *depth_view; + } + depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + return *depth_view; +} - const VkImageCopy copy{ - .srcSubresource = - { - .aspectMask = src_surface->GetAspectMask(), - .mipLevel = copy_params.source_level, - .baseArrayLayer = copy_params.source_z, - .layerCount = num_layers, - }, - .srcOffset = - { - .x = static_cast(copy_params.source_x), - .y = static_cast(copy_params.source_y), - .z = 0, - }, - .dstSubresource = - { - .aspectMask = dst_surface->GetAspectMask(), - .mipLevel = copy_params.dest_level, - .baseArrayLayer = dst_base_layer, - .layerCount = num_layers, - }, - .dstOffset = - { - .x = static_cast(copy_params.dest_x), - .y = static_cast(copy_params.dest_y), - .z = static_cast(dst_offset_z), - }, - .extent = - { - .width = copy_params.width, - .height = copy_params.height, - .depth = extent_z, - }, - }; +VkImageView ImageView::StencilView() { + if (stencil_view) { + return *stencil_view; + } + stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + return *stencil_view; +} - const VkImage src_image = src_surface->GetImageHandle(); - const VkImage dst_image = dst_surface->GetImageHandle(); - scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); +vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { + return device->GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image_handle, + .viewType = ImageViewType(type), + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, range), }); } -void VKTextureCache::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - // We can't blit inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT); - dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT); - - VkImageBlit blit; - blit.srcSubresource = src_view->GetImageSubresourceLayers(); - blit.srcOffsets[0].x = copy_config.src_rect.left; - blit.srcOffsets[0].y = copy_config.src_rect.top; - blit.srcOffsets[0].z = 0; - blit.srcOffsets[1].x = copy_config.src_rect.right; - blit.srcOffsets[1].y = copy_config.src_rect.bottom; - blit.srcOffsets[1].z = 1; - blit.dstSubresource = dst_view->GetImageSubresourceLayers(); - blit.dstOffsets[0].x = copy_config.dst_rect.left; - blit.dstOffsets[0].y = copy_config.dst_rect.top; - blit.dstOffsets[0].z = 0; - blit.dstOffsets[1].x = copy_config.dst_rect.right; - blit.dstOffsets[1].y = copy_config.dst_rect.bottom; - blit.dstOffsets[1].z = 1; - - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, - is_linear](vk::CommandBuffer cmdbuf) { - cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, - is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); +Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { + const auto& device = runtime.device; + const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); + const std::array color = tsc.BorderColor(); + // C++20 bit_cast + VkClearColorValue border_color; + std::memcpy(&border_color, &color, sizeof(color)); + const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .customBorderColor = border_color, + .format = VK_FORMAT_UNDEFINED, + }; + const void* pnext = nullptr; + if (arbitrary_borders) { + pnext = &border_ci; + } + const VkSamplerReductionModeCreateInfoEXT reduction_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, + .pNext = pnext, + .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), + }; + if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { + pnext = &reduction_ci; + } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { + LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); + } + // Some games have samplers with garbage. Sanitize them here. + const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = max_anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, }); } -void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { - // Currently unimplemented. PBO copies should be dropped and we should use a render pass to - // convert from color to depth and viceversa. - LOG_WARNING(Render_Vulkan, "Unimplemented"); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + std::vector descriptions; + std::vector attachments; + RenderPassKey renderpass_key{}; + s32 num_layers = 1; + + for (size_t index = 0; index < NUM_RT; ++index) { + const ImageView* const color_buffer = color_buffers[index]; + if (!color_buffer) { + renderpass_key.color_formats[index] = PixelFormat::Invalid; + continue; + } + descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); + attachments.push_back(color_buffer->RenderTarget()); + renderpass_key.color_formats[index] = color_buffer->format; + num_layers = std::max(num_layers, color_buffer->range.extent.layers); + images[num_images] = color_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(color_buffer); + samples = color_buffer->Samples(); + ++num_images; + } + const size_t num_colors = attachments.size(); + const VkAttachmentReference* depth_attachment = + depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; + if (depth_buffer) { + descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); + attachments.push_back(depth_buffer->RenderTarget()); + renderpass_key.depth_format = depth_buffer->format; + num_layers = std::max(num_layers, depth_buffer->range.extent.layers); + images[num_images] = depth_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + samples = depth_buffer->Samples(); + ++num_images; + } else { + renderpass_key.depth_format = PixelFormat::Invalid; + } + renderpass_key.samples = samples; + + const auto& device = runtime.device.GetLogical(); + const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); + if (is_new) { + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + } + renderpass = *cache_pair->second; + render_area = VkExtent2D{ + .width = key.size.width, + .height = key.size.height, + }; + num_color_buffers = static_cast(num_colors); + framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = renderpass, + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + .width = key.size.width, + .height = key.size.height, + .layers = static_cast(num_layers), + }); + if (runtime.device.HasDebuggingToolAttached()) { + framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b0be4cb0f..edc3d80c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -4,217 +4,265 @@ #pragma once -#include -#include +#include +#include -#include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace VideoCore { -class RasterizerInterface; -} - namespace Vulkan { -class RasterizerVulkan; +using VideoCommon::ImageId; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +using VideoCore::Surface::PixelFormat; + class VKDevice; class VKScheduler; class VKStagingBufferPool; -class CachedSurfaceView; -class CachedSurface; +class BlitImageHelper; +class Image; +class ImageView; +class Framebuffer; -using Surface = std::shared_ptr; -using View = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +struct RenderPassKey { + constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; + std::array color_formats; + PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; -class CachedSurface final : public VideoCommon::SurfaceBase { - friend CachedSurfaceView; +} // namespace Vulkan -public: - explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, - GPUVAddr gpu_addr_, const SurfaceParams& params_); - ~CachedSurface(); +namespace std { +template <> +struct hash { + [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std - void UploadTexture(const std::vector& staging_buffer) override; - void DownloadTexture(std::vector& staging_buffer) override; +namespace Vulkan { - void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(0, static_cast(params.GetNumLayers()), 0, params.num_levels, - new_stage_mask, new_access, new_layout); +struct ImageBufferMap { + [[nodiscard]] VkBuffer Handle() const noexcept { + return handle; } - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); + [[nodiscard]] std::span Span() const noexcept { + return map.Span(); } - VKImage& GetImage() { - return *image; - } + VkBuffer handle; + MemoryMap map; +}; - const VKImage& GetImage() const { - return *image; - } +struct TextureCacheRuntime { + const VKDevice& device; + VKScheduler& scheduler; + VKMemoryManager& memory_manager; + VKStagingBufferPool& staging_buffer_pool; + BlitImageHelper& blit_image_helper; + std::unordered_map renderpass_cache; + + void Finish(); - VkImage GetImageHandle() const { - return *image->GetHandle(); + [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); + + [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { + // TODO: Have a special function for this + return MapUploadBuffer(size); } - VkImageAspectFlags GetAspectMask() const { - return image->GetAspectMask(); + void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void CopyImage(Image& dst, Image& src, std::span copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + + [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + return false; } - VkBufferView GetBufferViewHandle() const { - return *buffer_view; + void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, + std::span) { + UNREACHABLE(); } -protected: - void DecorateSurfaceName() override; + void InsertUploadMemoryBarrier() {} +}; - View CreateView(const ViewParams& view_params) override; +class Image : public VideoCommon::ImageBase { +public: + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); -private: - void UploadBuffer(const std::vector& staging_buffer); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - void UploadImage(const std::vector& staging_buffer); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - VkBufferImageCopy GetBufferImageCopy(u32 level) const; + void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - VkImageSubresourceRange GetImageSubresourceRange() const; + [[nodiscard]] VkImage Handle() const noexcept { + return *image; + } - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + [[nodiscard]] VkBuffer Buffer() const noexcept { + return *buffer; + } + + [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { + return aspect_mask; + } - std::optional image; +private: + VKScheduler* scheduler; + vk::Image image; vk::Buffer buffer; - vk::BufferView buffer_view; VKMemoryCommit commit; - - VkFormat format = VK_FORMAT_UNDEFINED; + VkImageAspectFlags aspect_mask = 0; + bool initialized = false; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class ImageView : public VideoCommon::ImageViewBase { public: - explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, - const ViewParams& view_params_); - ~CachedSurfaceView(); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + [[nodiscard]] VkImageView DepthView(); - VkImageView GetAttachment(); + [[nodiscard]] VkImageView StencilView(); - bool IsSameSurface(const CachedSurfaceView& rhs) const { - return &surface == &rhs.surface; + [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { + return *image_views[static_cast(query_type)]; } - u32 GetWidth() const { - return surface_params.GetMipWidth(base_level); + [[nodiscard]] VkBufferView BufferView() const noexcept { + return *buffer_view; } - u32 GetHeight() const { - return surface_params.GetMipHeight(base_level); + [[nodiscard]] VkImage ImageHandle() const noexcept { + return image_handle; } - u32 GetNumLayers() const { - return num_layers; + [[nodiscard]] VkImageView RenderTarget() const noexcept { + return render_target; } - bool IsBufferView() const { - return buffer_view; + [[nodiscard]] PixelFormat ImageFormat() const noexcept { + return image_format; } - VkImage GetImage() const { - return image; + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; } - VkBufferView GetBufferView() const { - return buffer_view; - } +private: + [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); - VkImageSubresourceRange GetImageSubresourceRange() const { - return {aspect_mask, base_level, num_levels, base_layer, num_layers}; - } + const VKDevice* device = nullptr; + std::array image_views; + vk::ImageView depth_view; + vk::ImageView stencil_view; + vk::BufferView buffer_view; + VkImage image_handle = VK_NULL_HANDLE; + VkImageView render_target = VK_NULL_HANDLE; + PixelFormat image_format = PixelFormat::Invalid; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; +}; - VkImageSubresourceLayers GetImageSubresourceLayers() const { - return {surface.GetAspectMask(), base_level, base_layer, num_layers}; - } +class ImageAlloc : public VideoCommon::ImageAllocBase {}; - void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, - VkAccessFlags new_access) const { - surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); - } +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + [[nodiscard]] VkSampler Handle() const noexcept { + return *sampler; } private: - // Store a copy of these values to avoid double dereference when reading them - const SurfaceParams surface_params; - const VkImage image; - const VkBufferView buffer_view; - const VkImageAspectFlags aspect_mask; - - const VKDevice& device; - CachedSurface& surface; - const u32 base_level; - const u32 num_levels; - const VkImageViewType image_view_type; - u32 base_layer = 0; - u32 num_layers = 0; - u32 base_slice = 0; - u32 num_slices = 0; - - VkImageView last_image_view = nullptr; - u32 last_swizzle = 0; - - vk::ImageView render_target; - std::unordered_map view_cache; + vk::Sampler sampler; }; -class VKTextureCache final : public TextureCacheBase { +class Framebuffer { public: - explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKMemoryManager& memory_manager_, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_); - ~VKTextureCache(); + explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); -private: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + [[nodiscard]] VkFramebuffer Handle() const noexcept { + return *framebuffer; + } - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; + [[nodiscard]] VkRenderPass RenderPass() const noexcept { + return renderpass; + } - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; + [[nodiscard]] VkExtent2D RenderArea() const noexcept { + return render_area; + } - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; + } - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + [[nodiscard]] u32 NumColorBuffers() const noexcept { + return num_color_buffers; + } + + [[nodiscard]] u32 NumImages() const noexcept { + return num_images; + } + + [[nodiscard]] const std::array& Images() const noexcept { + return images; + } + + [[nodiscard]] const std::array& ImageRanges() const noexcept { + return image_ranges; + } + +private: + vk::Framebuffer framebuffer; + VkRenderPass renderpass{}; + VkExtent2D render_area{}; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + u32 num_color_buffers = 0; + u32 num_images = 0; + std::array images{}; + std::array image_ranges{}; +}; + +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = false; + static constexpr bool HAS_EMULATED_COPIES = false; + + using Runtime = Vulkan::TextureCacheRuntime; + using Image = Vulkan::Image; + using ImageAlloc = Vulkan::ImageAlloc; + using ImageView = Vulkan::ImageView; + using Sampler = Vulkan::Sampler; + using Framebuffer = Vulkan::Framebuffer; }; +using TextureCache = VideoCommon::TextureCache; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f7e3c9821..f098a8540 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -40,30 +40,34 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); - void AddSampledImage(VkSampler sampler, VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); + void AddSampledImage(VkImageView image_view, VkSampler sampler) { + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = VK_NULL_HANDLE, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } - void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { + payload.emplace_back(VkDescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }); } void AddTexelBuffer(VkBufferView texel_buffer) { payload.emplace_back(texel_buffer); } - VkImageLayout* LastImageLayout() { - return &payload.back().image.imageLayout; - } - - const VkImageLayout* LastImageLayout() const { - return &payload.back().image.imageLayout; - } - private: const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 1eced809e..2a21e850d 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdBeginQuery); X(vkCmdBeginRenderPass); X(vkCmdBeginTransformFeedbackEXT); + X(vkCmdBeginDebugUtilsLabelEXT); X(vkCmdBindDescriptorSets); X(vkCmdBindIndexBuffer); X(vkCmdBindPipeline); @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); + X(vkCmdEndDebugUtilsLabelEXT); X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkQueueSubmit); X(vkResetFences); X(vkResetQueryPoolEXT); + X(vkSetDebugUtilsObjectNameEXT); + X(vkSetDebugUtilsObjectTagEXT); X(vkUnmapMemory); X(vkUpdateDescriptorSetWithTemplateKHR); X(vkUpdateDescriptorSets); @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { #undef X } +template +void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, + const char* name) { + const VkDebugUtilsObjectNameInfoEXT name_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = nullptr, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = reinterpret_cast(handle), + .pObjectName = name, + }; + Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); +} + } // Anonymous namespace bool Load(InstanceDispatch& dld) noexcept { @@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback( VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, .pfnUserCallback = callback, .pUserData = nullptr, }; @@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); } +void Buffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void BufferView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +} + void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindImageMemory(owner, handle, memory, offset)); } +void Image::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +} + +void ImageView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); +} + +void DeviceMemory::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); +} + +void Fence::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); +} + +void Framebuffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); +} + DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { const std::size_t num = ai.descriptorSetCount; std::unique_ptr sets = std::make_unique(num); @@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } } +void DescriptorPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); +} + CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { const VkCommandBufferAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, @@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev } } +void CommandPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); +} + std::vector SwapchainKHR::GetImages() const { u32 num; Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); @@ -538,6 +591,18 @@ std::vector SwapchainKHR::GetImages() const { return images; } +void Event::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); +} + +void ShaderModule::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); +} + +void Semaphore::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); +} + Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, DeviceDispatch& dispatch) noexcept { diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 76f790eab..f9a184e00 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,10 @@ #include "common/common_types.h" +#ifdef _MSC_VER +#pragma warning(disable : 26812) // Disable prefer enum class over enum +#endif + namespace Vulkan::vk { /** @@ -41,6 +46,9 @@ public: /// Construct an empty span. constexpr Span() noexcept = default; + /// Construct an empty span + constexpr Span(std::nullptr_t) noexcept {} + /// Construct a span from a single element. constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdBeginQuery vkCmdBeginQuery; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; PFN_vkCmdBindPipeline vkCmdBindPipeline; @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdEndQuery vkCmdEndQuery; PFN_vkCmdEndRenderPass vkCmdEndRenderPass; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; PFN_vkCmdFillBuffer vkCmdFillBuffer; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; PFN_vkCmdPushConstants vkCmdPushConstants; @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; + PFN_vkCmdResolveImage vkCmdResolveImage; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; + PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; PFN_vkUnmapMemory vkUnmapMemory; PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; @@ -542,18 +555,14 @@ private: const DeviceDispatch* dld = nullptr; }; -using BufferView = Handle; using DebugCallback = Handle; using DescriptorSetLayout = Handle; using DescriptorUpdateTemplateKHR = Handle; -using Framebuffer = Handle; -using ImageView = Handle; using Pipeline = Handle; using PipelineLayout = Handle; using QueryPool = Handle; using RenderPass = Handle; using Sampler = Handle; -using ShaderModule = Handle; using SurfaceKHR = Handle; using DescriptorSets = PoolAllocations; @@ -605,6 +614,17 @@ class Buffer : public Handle { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class BufferView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class Image : public Handle { @@ -613,12 +633,26 @@ class Image : public Handle { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class ImageView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class DeviceMemory : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { void* data; Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); @@ -634,6 +668,9 @@ class Fence : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult Wait(u64 timeout = std::numeric_limits::max()) const noexcept { return dld->vkWaitForFences(owner, 1, &handle, true, timeout); } @@ -647,11 +684,22 @@ public: } }; +class Framebuffer : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class DescriptorPool : public Handle { using Handle::Handle; public: DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class CommandPool : public Handle { @@ -660,6 +708,9 @@ class CommandPool : public Handle { public: CommandBuffers Allocate(std::size_t num_buffers, VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class SwapchainKHR : public Handle { @@ -673,15 +724,29 @@ class Event : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult GetStatus() const noexcept { return dld->vkGetEventStatus(owner, handle); } }; +class ShaderModule : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class Semaphore : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + [[nodiscard]] u64 GetCounter() const { u64 value; Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); @@ -932,6 +997,12 @@ public: regions.data(), filter); } + void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span regions) { + dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + void Dispatch(u32 x, u32 y, u32 z) const noexcept { dld->vkCmdDispatch(handle, x, y, z); } @@ -946,6 +1017,23 @@ public: image_barriers.size(), image_barriers.data()); } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags = 0) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkBufferMemoryBarrier& buffer_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkImageMemoryBarrier& image_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); + } + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, Span regions) const noexcept { dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), @@ -979,6 +1067,13 @@ public: dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); } + template + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, + const T& data) const noexcept { + static_assert(std::is_trivially_copyable_v, " is not trivially copyable"); + dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast(sizeof(T)), &data); + } + void SetViewport(u32 first, Span viewports) const noexcept { dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); } @@ -1088,6 +1183,20 @@ public: counter_buffers, counter_buffer_offsets); } + void BeginDebugUtilsLabelEXT(const char* label, std::span color) const noexcept { + const VkDebugUtilsLabelEXT label_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = label, + .color{color[0], color[1], color[2], color[3]}, + }; + dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + } + + void EndDebugUtilsLabelEXT() const noexcept { + dld->vkCmdEndDebugUtilsLabelEXT(handle); + } + private: VkCommandBuffer handle; const DeviceDispatch* dld; diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/sampler_cache.h" - -namespace VideoCommon { - -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast( - Common::CityHash64(reinterpret_cast(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - -} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/textures/texture.h" - -namespace VideoCommon { - -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace VideoCommon { - -template -class SamplerCache { -public: - SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return ToSamplerType(sampler); - } - -protected: - virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; - - virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; - -private: - std::unordered_map cache; -}; - -} // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 78245473c..09f93463b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector bindings, Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key) { + Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { std::unique_lock lock(queue_mutex); pending_queue.push({ .backend = Backend::Vulkan, @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .scheduler = &scheduler, .descriptor_pool = &descriptor_pool, .update_descriptor_queue = &update_descriptor_queue, - .renderpass_cache = &renderpass_cache, .bindings = std::move(bindings), .program = std::move(program), .key = key, + .num_color_buffers = num_color_buffers, }); cv.notify_one(); } @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context } else if (work.backend == Backend::Vulkan) { auto pipeline = std::make_unique( *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, - work.program); + *work.update_descriptor_queue, work.key, work.bindings, work.program, + work.num_color_buffers); work.pp_cache->EmplacePipeline(std::move(pipeline)); } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 5a7216019..004e214a8 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -98,9 +98,9 @@ public: Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, + u32 num_color_buffers); private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -127,10 +127,10 @@ private: Vulkan::VKScheduler* scheduler; Vulkan::VKDescriptorPool* descriptor_pool; Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - Vulkan::VKRenderPassCache* renderpass_cache; std::vector bindings; Vulkan::SPIRVProgram program; Vulkan::GraphicsPipelineCacheKey key; + u32 num_color_buffers; }; std::condition_variable cv; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index ab14c1aa3..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode; namespace { void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { + const std::list& used_samplers) { if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, } } -std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, +std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { + const std::list& used_samplers) { const u32 base_offset = sampler_to_deduce.offset; u32 max_offset{std::numeric_limits::max()}; for (const auto& sampler : used_samplers) { diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 532f66d27..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return pc; } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { const auto offset = static_cast(image.index.Value()); - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [offset](const Image& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [offset](const ImageEntry& entry) { return entry.offset == offset; }); if (it != std::end(used_images)) { ASSERT(!it->is_bindless && it->type == type); return *it; @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t return used_images.emplace_back(next_index, offset, type); } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); const auto result = TrackCbuf(image_register, global_code, static_cast(global_code.size())); @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im const auto offset = std::get<2>(result); const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const Image& entry) { + [buffer, offset](const ImageEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != std::end(used_images)) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index fb18f631f..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); + const std::optional sampler = GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.type = texture_type; info.is_array = is_array; - const std::optional sampler = is_bindless - ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional sampler = + is_bindless ? GetBindlessSampler(base_reg, info, index_var) + : GetSampler(instr.sampler, info); Node4 values; if (!sampler) { std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { [[fallthrough]]; case OpCode::Id::TXQ: { Node index_var; - const std::optional sampler = is_bindless - ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); + const std::optional sampler = + is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) + : GetSampler(instr.sampler, {}); if (!sampler) { u32 indexer = 0; @@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { info.type = texture_type; info.is_array = is_array; Node index_var; - const std::optional sampler = + const std::optional sampler = is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) : GetSampler(instr.sampler, info); @@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( return info; } -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { +std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, + SamplerInfo sampler_info) { const u32 offset = static_cast(sampler.index.Value()); const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const Sampler& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [offset](const SamplerEntry& entry) { return entry.offset == offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -399,8 +400,8 @@ std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, *info.is_shadow, *info.is_buffer, false); } -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var) { +std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, + SamplerInfo info, Node& index_var) { const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); @@ -416,7 +417,7 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const Sampler& entry) { + [buffer, offset](const SamplerEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -436,11 +437,12 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); // Try to use an already created sampler if it exists - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [indices, offsets](const SamplerEntry& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); if (it != used_samplers.end()) { ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -460,7 +462,7 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( used_samplers.begin(), used_samplers.end(), - [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); + [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && @@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, info.is_buffer = false; Node index_var; - const std::optional sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) + : GetSampler(instr.sampler, info); if (!sampler) { return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; } @@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de info.is_shadow = depth_compare; Node index_var; - const std::optional sampler = + const std::optional sampler = is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) : GetSampler(instr.sampler, info); Node4 values; @@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const std::optional sampler = GetSampler(instr.sampler, {}); + const std::optional sampler = GetSampler(instr.sampler, {}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is info.type = texture_type; info.is_array = is_array; info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); + const std::optional sampler = GetSampler(instr.sampler, info); const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8db9e1de7..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -282,25 +282,24 @@ struct SeparateSamplerNode; using TrackSamplerData = std::variant; using TrackSampler = std::shared_ptr; -struct Sampler { +struct SamplerEntry { /// Bound samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) + explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, + bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor - constexpr explicit Sampler(u32 index_, std::pair offsets_, - std::pair buffers_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_) - : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, - buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, - is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} + explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, + Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, + bool is_buffer_) + : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, + is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_, bool is_indexed_) + explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, + bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { } @@ -340,14 +339,14 @@ struct BindlessSamplerNode { u32 offset; }; -struct Image { +struct ImageEntry { public: /// Bound images constructor - constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, type{type_} {} /// Bindless samplers constructor - constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} void MarkWrite() { @@ -391,7 +390,7 @@ struct MetaArithmetic { /// Parameters describing a texture sampler struct MetaTexture { - Sampler sampler; + SamplerEntry sampler; Node array; Node depth_compare; std::vector aoffi; @@ -405,7 +404,7 @@ struct MetaTexture { }; struct MetaImage { - const Image& image; + const ImageEntry& image; std::vector values; u32 element{}; }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aae14e34..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -94,11 +94,11 @@ public: return used_cbufs; } - const std::list& GetSamplers() const { + const std::list& GetSamplers() const { return used_samplers; } - const std::list& GetImages() const { + const std::list& GetImages() const { return used_images; } @@ -334,17 +334,17 @@ private: std::optional sampler); /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); + std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); + std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, + Node& index_var); /// Accesses an image. - Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); /// Access a bindless image sampler. - Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -454,8 +454,8 @@ private: std::set used_input_attributes; std::set used_output_attributes; std::map used_cbufs; - std::list used_samplers; - std::list used_images; + std::list used_samplers; + std::list used_images; std::array used_clip_distances{}; std::map used_global_memory; bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 937e29d1e..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) { } std::pair GetASTCBlockSize(PixelFormat format) { - return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -120,7 +120,7 @@ enum class PixelFormat { Max = MaxDepthStencilFormat, Invalid = 255, }; -static constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max); +constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max); enum class SurfaceType { ColorTexture = 0, @@ -140,117 +140,7 @@ enum class SurfaceTarget { TextureCubeArray, }; -constexpr std::array compression_factor_shift_table = {{ - 0, // A8B8G8R8_UNORM - 0, // A8B8G8R8_SNORM - 0, // A8B8G8R8_SINT - 0, // A8B8G8R8_UINT - 0, // R5G6B5_UNORM - 0, // B5G6R5_UNORM - 0, // A1R5G5B5_UNORM - 0, // A2B10G10R10_UNORM - 0, // A2B10G10R10_UINT - 0, // A1B5G5R5_UNORM - 0, // R8_UNORM - 0, // R8_SNORM - 0, // R8_SINT - 0, // R8_UINT - 0, // R16G16B16A16_FLOAT - 0, // R16G16B16A16_UNORM - 0, // R16G16B16A16_SNORM - 0, // R16G16B16A16_SINT - 0, // R16G16B16A16_UINT - 0, // B10G11R11_FLOAT - 0, // R32G32B32A32_UINT - 2, // BC1_RGBA_UNORM - 2, // BC2_UNORM - 2, // BC3_UNORM - 2, // BC4_UNORM - 2, // BC4_SNORM - 2, // BC5_UNORM - 2, // BC5_SNORM - 2, // BC7_UNORM - 2, // BC6H_UFLOAT - 2, // BC6H_SFLOAT - 2, // ASTC_2D_4X4_UNORM - 0, // B8G8R8A8_UNORM - 0, // R32G32B32A32_FLOAT - 0, // R32G32B32A32_SINT - 0, // R32G32_FLOAT - 0, // R32G32_SINT - 0, // R32_FLOAT - 0, // R16_FLOAT - 0, // R16_UNORM - 0, // R16_SNORM - 0, // R16_UINT - 0, // R16_SINT - 0, // R16G16_UNORM - 0, // R16G16_FLOAT - 0, // R16G16_UINT - 0, // R16G16_SINT - 0, // R16G16_SNORM - 0, // R32G32B32_FLOAT - 0, // A8B8G8R8_SRGB - 0, // R8G8_UNORM - 0, // R8G8_SNORM - 0, // R8G8_SINT - 0, // R8G8_UINT - 0, // R32G32_UINT - 0, // R16G16B16X16_FLOAT - 0, // R32_UINT - 0, // R32_SINT - 2, // ASTC_2D_8X8_UNORM - 2, // ASTC_2D_8X5_UNORM - 2, // ASTC_2D_5X4_UNORM - 0, // B8G8R8A8_SRGB - 2, // BC1_RGBA_SRGB - 2, // BC2_SRGB - 2, // BC3_SRGB - 2, // BC7_SRGB - 0, // A4B4G4R4_UNORM - 2, // ASTC_2D_4X4_SRGB - 2, // ASTC_2D_8X8_SRGB - 2, // ASTC_2D_8X5_SRGB - 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5_UNORM - 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8_UNORM - 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6_UNORM - 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10_UNORM - 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12_UNORM - 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6_UNORM - 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5_UNORM - 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9_FLOAT - 0, // D32_FLOAT - 0, // D16_UNORM - 0, // D24_UNORM_S8_UINT - 0, // S8_UINT_D24_UNORM - 0, // D32_FLOAT_S8_UINT -}}; - -/** - * Gets the compression factor for the specified PixelFormat. This applies to just the - * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed - * texture formats. - */ -inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { - DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast(format) < compression_factor_shift_table.size()); - return compression_factor_shift_table[static_cast(format)]; -} - -inline constexpr u32 GetCompressionFactor(PixelFormat format) { - return 1U << GetCompressionFactorShift(format); -} - -constexpr std::array block_width_table = {{ +constexpr std::array BLOCK_WIDTH_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -344,15 +234,12 @@ constexpr std::array block_width_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < block_width_table.size()); - return block_width_table[static_cast(format)]; +constexpr u32 DefaultBlockWidth(PixelFormat format) { + ASSERT(static_cast(format) < BLOCK_WIDTH_TABLE.size()); + return BLOCK_WIDTH_TABLE[static_cast(format)]; } -constexpr std::array block_height_table = {{ +constexpr std::array BLOCK_HEIGHT_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -446,15 +333,12 @@ constexpr std::array block_height_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < block_height_table.size()); - return block_height_table[static_cast(format)]; +constexpr u32 DefaultBlockHeight(PixelFormat format) { + ASSERT(static_cast(format) < BLOCK_HEIGHT_TABLE.size()); + return BLOCK_HEIGHT_TABLE[static_cast(format)]; } -constexpr std::array bpp_table = {{ +constexpr std::array BITS_PER_BLOCK_TABLE = {{ 32, // A8B8G8R8_UNORM 32, // A8B8G8R8_SNORM 32, // A8B8G8R8_SINT @@ -548,20 +432,14 @@ constexpr std::array bpp_table = {{ 64, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetFormatBpp(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < bpp_table.size()); - return bpp_table[static_cast(format)]; +constexpr u32 BitsPerBlock(PixelFormat format) { + ASSERT(static_cast(format) < BITS_PER_BLOCK_TABLE.size()); + return BITS_PER_BLOCK_TABLE[static_cast(format)]; } /// Returns the sizer in bytes of the specified pixel format -static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { - if (pixel_format == PixelFormat::Invalid) { - return 0; - } - return GetFormatBpp(pixel_format) / CHAR_BIT; +constexpr u32 BytesPerBlock(PixelFormat pixel_format) { + return BitsPerBlock(pixel_format) / CHAR_BIT; } SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon::Accelerated { + +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using VideoCore::Surface::BytesPerBlock; + +BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + return BlockLinearSwizzle2DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .layer_stride = info.layer_stride, + .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + }; +} + +BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); + const u32 slice_size = + Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; + return BlockLinearSwizzle3DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .slice_size = slice_size, + .block_size = block_size, + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + .block_depth = block.depth, + .block_depth_mask = (1U << block.depth) - 1, + }; +} + +} // namespace VideoCommon::Accelerated \ No newline at end of file diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon::Accelerated { + +struct BlockLinearSwizzle2DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 layer_stride; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; +}; + +struct BlockLinearSwizzle3DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 slice_size; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; + u32 block_depth; + u32 block_depth_mask; +}; + +[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +} // namespace VideoCommon::Accelerated diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 5b475fe06..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCommon { - -struct CopyParams { - constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, - u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, - u32 depth_) - : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, - dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, - dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} - - constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) - : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, - dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} - - u32 source_x; - u32 source_y; - u32 source_z; - u32 dest_x; - u32 dest_y; - u32 dest_z; - u32 source_level; - u32 dest_level; - u32 width; - u32 height; - u32 depth; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { + const u32 code_offset = 16 + 3 * (4 * y + x); + const u32 code = (bits >> code_offset) & 7; + const u32 red0 = (bits >> 0) & 0xff; + const u32 red1 = (bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void DecompressBC4(std::span input, Extent3D extent, std::span output) { + UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); + UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); + static constexpr u32 BLOCK_SIZE = 4; + size_t input_offset = 0; + for (u32 slice = 0; slice < extent.depth; ++slice) { + for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { + for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { + u64 bits; + std::memcpy(&bits, &input[input_offset], sizeof(bits)); + input_offset += sizeof(bits); + + for (u32 y = 0; y < BLOCK_SIZE; ++y) { + for (u32 x = 0; x < BLOCK_SIZE; ++x) { + const u32 linear_z = slice; + const u32 linear_y = block_y * BLOCK_SIZE + y; + const u32 linear_x = block_x * BLOCK_SIZE + x; + const u32 offset_z = linear_z * extent.width * extent.height; + const u32 offset_y = linear_y * extent.width; + const u32 offset_x = linear_x; + const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; + const u32 color = DecompressBlock(bits, x, y); + output[output_offset + 0] = static_cast(color); + output[output_offset + 1] = 0; + output[output_offset + 2] = 0; + output[output_offset + 3] = 0xff; + } + } + } + } + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +void DecompressBC4(std::span data, Extent3D extent, std::span output); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h @@ -0,0 +1,82 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/logging/log.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template +class DescriptorTable { +public: + explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} + + [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { + [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { + return false; + } + Refresh(gpu_addr, limit); + return true; + } + + void Invalidate() noexcept { + std::ranges::fill(read_descriptors, 0); + } + + [[nodiscard]] std::pair Read(u32 index) { + DEBUG_ASSERT(index <= current_limit); + const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); + std::pair result; + gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); + if (IsDescriptorRead(index)) { + result.second = result.first != descriptors[index]; + } else { + MarkDescriptorAsRead(index); + result.second = true; + } + if (result.second) { + descriptors[index] = result.first; + } + return result; + } + + [[nodiscard]] u32 Limit() const noexcept { + return current_limit; + } + +private: + void Refresh(GPUVAddr gpu_addr, u32 limit) { + current_gpu_addr = gpu_addr; + current_limit = limit; + + const size_t num_descriptors = static_cast(limit) + 1; + read_descriptors.clear(); + read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); + descriptors.resize(num_descriptors); + } + + void MarkDescriptorAsRead(u32 index) noexcept { + read_descriptors[index / 64] |= 1ULL << (index % 64); + } + + [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { + return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; + } + + Tegra::MemoryManager& gpu_memory; + GPUVAddr current_gpu_addr{}; + u32 current_limit{}; + std::vector read_descriptors; + std::vector descriptors; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7938d71eb..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/texture_cache/format_lookup_table.h" @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; constexpr auto SINT = ComponentType::SINT; constexpr auto UINT = ComponentType::UINT; constexpr auto FLOAT = ComponentType::FLOAT; -constexpr bool C = false; // Normal color -constexpr bool S = true; // Srgb - -struct Table { - constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, - ComponentType green_component_, ComponentType blue_component_, - ComponentType alpha_component_, PixelFormat pixel_format_) - : texture_format{texture_format_}, pixel_format{pixel_format_}, - red_component{red_component_}, green_component{green_component_}, - blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} - - TextureFormat texture_format; - PixelFormat pixel_format; - ComponentType red_component; - ComponentType green_component; - ComponentType blue_component; - ComponentType alpha_component; - bool is_srgb; -}; -constexpr std::array DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, - {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, - {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, - {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, - {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - - {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, - {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, - {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, - {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - - {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, - {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, - {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, - {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, - {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - - {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, - {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, - {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, - {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, - {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - - {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - - {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, - {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, - {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - - {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - - {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, - {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, - {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - - {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - - {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, - {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, - {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - - {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, - {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - - {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, - {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - - {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, - {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - - {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, - {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - - {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, - {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - - {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, - {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - - {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, - {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, - {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, - {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, - {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, - {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, - {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, - {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, - {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, - {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, - {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, - {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, +constexpr bool LINEAR = false; +constexpr bool SRGB = true; + +constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, + ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { + u32 hash = is_srgb ? 1 : 0; + hash |= static_cast(red_component) << 1; + hash |= static_cast(green_component) << 4; + hash |= static_cast(blue_component) << 7; + hash |= static_cast(alpha_component) << 10; + hash |= static_cast(format) << 13; + return hash; +} - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, - {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, -}}; +constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { + return Hash(format, component, component, component, component, is_srgb); +} } // Anonymous namespace -FormatLookupTable::FormatLookupTable() { - table.fill(static_cast(PixelFormat::Invalid)); - - for (const auto& entry : DefinitionTable) { - table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, - entry.green_component, entry.blue_component, entry.alpha_component)] = - static_cast(entry.pixel_format); - } -} - -PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) const noexcept { - const auto pixel_format = static_cast(table[CalculateIndex( - format, is_srgb, red_component, green_component, blue_component, alpha_component)]); - // [[likely]] - if (pixel_format != PixelFormat::Invalid) { - return pixel_format; +PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, + ComponentType blue, ComponentType alpha, + bool is_srgb) noexcept { + switch (Hash(format, red, green, blue, alpha, is_srgb)) { + case Hash(TextureFormat::A8R8G8B8, UNORM): + return PixelFormat::A8B8G8R8_UNORM; + case Hash(TextureFormat::A8R8G8B8, SNORM): + return PixelFormat::A8B8G8R8_SNORM; + case Hash(TextureFormat::A8R8G8B8, UINT): + return PixelFormat::A8B8G8R8_UINT; + case Hash(TextureFormat::A8R8G8B8, SINT): + return PixelFormat::A8B8G8R8_SINT; + case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): + return PixelFormat::A8B8G8R8_SRGB; + case Hash(TextureFormat::B5G6R5, UNORM): + return PixelFormat::B5G6R5_UNORM; + case Hash(TextureFormat::A2B10G10R10, UNORM): + return PixelFormat::A2B10G10R10_UNORM; + case Hash(TextureFormat::A2B10G10R10, UINT): + return PixelFormat::A2B10G10R10_UINT; + case Hash(TextureFormat::A1B5G5R5, UNORM): + return PixelFormat::A1B5G5R5_UNORM; + case Hash(TextureFormat::A4B4G4R4, UNORM): + return PixelFormat::A4B4G4R4_UNORM; + case Hash(TextureFormat::R8, UNORM): + return PixelFormat::R8_UNORM; + case Hash(TextureFormat::R8, SNORM): + return PixelFormat::R8_SNORM; + case Hash(TextureFormat::R8, UINT): + return PixelFormat::R8_UINT; + case Hash(TextureFormat::R8, SINT): + return PixelFormat::R8_SINT; + case Hash(TextureFormat::R8G8, UNORM): + return PixelFormat::R8G8_UNORM; + case Hash(TextureFormat::R8G8, SNORM): + return PixelFormat::R8G8_SNORM; + case Hash(TextureFormat::R8G8, UINT): + return PixelFormat::R8G8_UINT; + case Hash(TextureFormat::R8G8, SINT): + return PixelFormat::R8G8_SINT; + case Hash(TextureFormat::R16G16B16A16, FLOAT): + return PixelFormat::R16G16B16A16_FLOAT; + case Hash(TextureFormat::R16G16B16A16, UNORM): + return PixelFormat::R16G16B16A16_UNORM; + case Hash(TextureFormat::R16G16B16A16, SNORM): + return PixelFormat::R16G16B16A16_SNORM; + case Hash(TextureFormat::R16G16B16A16, UINT): + return PixelFormat::R16G16B16A16_UINT; + case Hash(TextureFormat::R16G16B16A16, SINT): + return PixelFormat::R16G16B16A16_SINT; + case Hash(TextureFormat::R16G16, FLOAT): + return PixelFormat::R16G16_FLOAT; + case Hash(TextureFormat::R16G16, UNORM): + return PixelFormat::R16G16_UNORM; + case Hash(TextureFormat::R16G16, SNORM): + return PixelFormat::R16G16_SNORM; + case Hash(TextureFormat::R16G16, UINT): + return PixelFormat::R16G16_UINT; + case Hash(TextureFormat::R16G16, SINT): + return PixelFormat::R16G16_SINT; + case Hash(TextureFormat::R16, FLOAT): + return PixelFormat::R16_FLOAT; + case Hash(TextureFormat::R16, UNORM): + return PixelFormat::R16_UNORM; + case Hash(TextureFormat::R16, SNORM): + return PixelFormat::R16_SNORM; + case Hash(TextureFormat::R16, UINT): + return PixelFormat::R16_UINT; + case Hash(TextureFormat::R16, SINT): + return PixelFormat::R16_SINT; + case Hash(TextureFormat::B10G11R11, FLOAT): + return PixelFormat::B10G11R11_FLOAT; + case Hash(TextureFormat::R32G32B32A32, FLOAT): + return PixelFormat::R32G32B32A32_FLOAT; + case Hash(TextureFormat::R32G32B32A32, UINT): + return PixelFormat::R32G32B32A32_UINT; + case Hash(TextureFormat::R32G32B32A32, SINT): + return PixelFormat::R32G32B32A32_SINT; + case Hash(TextureFormat::R32G32B32, FLOAT): + return PixelFormat::R32G32B32_FLOAT; + case Hash(TextureFormat::R32G32, FLOAT): + return PixelFormat::R32G32_FLOAT; + case Hash(TextureFormat::R32G32, UINT): + return PixelFormat::R32G32_UINT; + case Hash(TextureFormat::R32G32, SINT): + return PixelFormat::R32G32_SINT; + case Hash(TextureFormat::R32, FLOAT): + return PixelFormat::R32_FLOAT; + case Hash(TextureFormat::R32, UINT): + return PixelFormat::R32_UINT; + case Hash(TextureFormat::R32, SINT): + return PixelFormat::R32_SINT; + case Hash(TextureFormat::E5B9G9R9, FLOAT): + return PixelFormat::E5B9G9R9_FLOAT; + case Hash(TextureFormat::D32, FLOAT): + return PixelFormat::D32_FLOAT; + case Hash(TextureFormat::D16, UNORM): + return PixelFormat::D16_UNORM; + case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): + return PixelFormat::D32_FLOAT_S8_UINT; + case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): + return PixelFormat::BC1_RGBA_UNORM; + case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): + return PixelFormat::BC1_RGBA_SRGB; + case Hash(TextureFormat::BC2, UNORM, LINEAR): + return PixelFormat::BC2_UNORM; + case Hash(TextureFormat::BC2, UNORM, SRGB): + return PixelFormat::BC2_SRGB; + case Hash(TextureFormat::BC3, UNORM, LINEAR): + return PixelFormat::BC3_UNORM; + case Hash(TextureFormat::BC3, UNORM, SRGB): + return PixelFormat::BC3_SRGB; + case Hash(TextureFormat::BC4, UNORM): + return PixelFormat::BC4_UNORM; + case Hash(TextureFormat::BC4, SNORM): + return PixelFormat::BC4_SNORM; + case Hash(TextureFormat::BC5, UNORM): + return PixelFormat::BC5_UNORM; + case Hash(TextureFormat::BC5, SNORM): + return PixelFormat::BC5_SNORM; + case Hash(TextureFormat::BC7, UNORM, LINEAR): + return PixelFormat::BC7_UNORM; + case Hash(TextureFormat::BC7, UNORM, SRGB): + return PixelFormat::BC7_SRGB; + case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): + return PixelFormat::BC6H_SFLOAT; + case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): + return PixelFormat::BC6H_UFLOAT; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_4X4_UNORM; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_4X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X4_UNORM; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X5_UNORM; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X5_SRGB; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X8_UNORM; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X8_SRGB; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X5_UNORM; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X5_SRGB; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X8_UNORM; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X8_SRGB; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X6_UNORM; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X6_SRGB; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X10_UNORM; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X10_SRGB; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): + return PixelFormat::ASTC_2D_12X12_UNORM; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): + return PixelFormat::ASTC_2D_12X12_SRGB; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X6_UNORM; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X6_SRGB; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X5_UNORM; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X5_SRGB; } UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", - static_cast(format), is_srgb, static_cast(red_component), - static_cast(green_component), static_cast(blue_component), - static_cast(alpha_component)); + static_cast(format), is_srgb, static_cast(red), + static_cast(green), static_cast(blue), static_cast(alpha)); return PixelFormat::A8B8G8R8_UNORM; } -void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, - ComponentType green_component, ComponentType blue_component, - ComponentType alpha_component, PixelFormat pixel_format) {} - -std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) noexcept { - const auto format_index = static_cast(format); - const auto red_index = static_cast(red_component); - const auto green_index = static_cast(green_component); - const auto blue_index = static_cast(blue_component); - const auto alpha_index = static_cast(alpha_component); - const std::size_t srgb_index = is_srgb ? 1 : 0; - - return format_index * PerFormat + - srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + - alpha_index * PerComponent * PerComponent * PerComponent + - blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h @@ -4,48 +4,14 @@ #pragma once -#include -#include #include "video_core/surface.h" #include "video_core/textures/texture.h" namespace VideoCommon { -class FormatLookupTable { -public: - explicit FormatLookupTable(); - - VideoCore::Surface::PixelFormat GetPixelFormat( - Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) const noexcept; - -private: - static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits::max()); - - static constexpr std::size_t NumTextureFormats = 128; - - static constexpr std::size_t PerComponent = 8; - static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; - static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; - static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; - static constexpr std::size_t PerFormat = PerComponents4 * 2; - - static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) noexcept; - - void Set(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component, - VideoCore::Surface::PixelFormat pixel_format); - - std::array table; -}; +VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( + Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, + Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp @@ -0,0 +1,95 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/render_targets.h" + +namespace VideoCommon { + +std::string Name(const ImageBase& image) { + const GPUVAddr gpu_addr = image.gpu_addr; + const ImageInfo& info = image.info; + const u32 width = info.size.width; + const u32 height = info.size.height; + const u32 depth = info.size.depth; + const u32 num_layers = image.info.resources.layers; + const u32 num_levels = image.info.resources.levels; + std::string resource; + if (num_layers > 1) { + resource += fmt::format(":L{}", num_layers); + } + if (num_levels > 1) { + resource += fmt::format(":M{}", num_levels); + } + switch (image.info.type) { + case ImageType::e1D: + return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); + case ImageType::e2D: + return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); + case ImageType::e3D: + return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); + case ImageType::Linear: + return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); + case ImageType::Buffer: + return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); + } + return "Invalid"; +} + +std::string Name(const ImageViewBase& image_view, std::optional type) { + const u32 width = image_view.size.width; + const u32 height = image_view.size.height; + const u32 depth = image_view.size.depth; + const u32 num_levels = image_view.range.extent.levels; + const u32 num_layers = image_view.range.extent.layers; + + const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; + switch (type.value_or(image_view.type)) { + case ImageViewType::e1D: + return fmt::format("ImageView 1D {}{}", width, level); + case ImageViewType::e2D: + return fmt::format("ImageView 2D {}x{}{}", width, height, level); + case ImageViewType::Cube: + return fmt::format("ImageView Cube {}x{}{}", width, height, level); + case ImageViewType::e3D: + return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); + case ImageViewType::e1DArray: + return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); + case ImageViewType::e2DArray: + return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::CubeArray: + return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::Rect: + return fmt::format("ImageView Rect {}x{}{}", width, height, level); + case ImageViewType::Buffer: + return fmt::format("BufferView {}", width); + } + return "Invalid"; +} + +std::string Name(const RenderTargets& render_targets) { + std::string_view debug_prefix; + const auto num_color = std::ranges::count_if( + render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast(id); }); + if (render_targets.depth_buffer_id) { + debug_prefix = num_color > 0 ? "R" : "Z"; + } else { + debug_prefix = num_color > 0 ? "C" : "X"; + } + const Extent2D size = render_targets.size; + if (num_color > 0) { + return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, + size.height); + } else { + return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h @@ -0,0 +1,263 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +template <> +struct fmt::formatter : fmt::formatter { + template + auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { + using VideoCore::Surface::PixelFormat; + const string_view name = [format] { + switch (format) { + case PixelFormat::A8B8G8R8_UNORM: + return "A8B8G8R8_UNORM"; + case PixelFormat::A8B8G8R8_SNORM: + return "A8B8G8R8_SNORM"; + case PixelFormat::A8B8G8R8_SINT: + return "A8B8G8R8_SINT"; + case PixelFormat::A8B8G8R8_UINT: + return "A8B8G8R8_UINT"; + case PixelFormat::R5G6B5_UNORM: + return "R5G6B5_UNORM"; + case PixelFormat::B5G6R5_UNORM: + return "B5G6R5_UNORM"; + case PixelFormat::A1R5G5B5_UNORM: + return "A1R5G5B5_UNORM"; + case PixelFormat::A2B10G10R10_UNORM: + return "A2B10G10R10_UNORM"; + case PixelFormat::A2B10G10R10_UINT: + return "A2B10G10R10_UINT"; + case PixelFormat::A1B5G5R5_UNORM: + return "A1B5G5R5_UNORM"; + case PixelFormat::R8_UNORM: + return "R8_UNORM"; + case PixelFormat::R8_SNORM: + return "R8_SNORM"; + case PixelFormat::R8_SINT: + return "R8_SINT"; + case PixelFormat::R8_UINT: + return "R8_UINT"; + case PixelFormat::R16G16B16A16_FLOAT: + return "R16G16B16A16_FLOAT"; + case PixelFormat::R16G16B16A16_UNORM: + return "R16G16B16A16_UNORM"; + case PixelFormat::R16G16B16A16_SNORM: + return "R16G16B16A16_SNORM"; + case PixelFormat::R16G16B16A16_SINT: + return "R16G16B16A16_SINT"; + case PixelFormat::R16G16B16A16_UINT: + return "R16G16B16A16_UINT"; + case PixelFormat::B10G11R11_FLOAT: + return "B10G11R11_FLOAT"; + case PixelFormat::R32G32B32A32_UINT: + return "R32G32B32A32_UINT"; + case PixelFormat::BC1_RGBA_UNORM: + return "BC1_RGBA_UNORM"; + case PixelFormat::BC2_UNORM: + return "BC2_UNORM"; + case PixelFormat::BC3_UNORM: + return "BC3_UNORM"; + case PixelFormat::BC4_UNORM: + return "BC4_UNORM"; + case PixelFormat::BC4_SNORM: + return "BC4_SNORM"; + case PixelFormat::BC5_UNORM: + return "BC5_UNORM"; + case PixelFormat::BC5_SNORM: + return "BC5_SNORM"; + case PixelFormat::BC7_UNORM: + return "BC7_UNORM"; + case PixelFormat::BC6H_UFLOAT: + return "BC6H_UFLOAT"; + case PixelFormat::BC6H_SFLOAT: + return "BC6H_SFLOAT"; + case PixelFormat::ASTC_2D_4X4_UNORM: + return "ASTC_2D_4X4_UNORM"; + case PixelFormat::B8G8R8A8_UNORM: + return "B8G8R8A8_UNORM"; + case PixelFormat::R32G32B32A32_FLOAT: + return "R32G32B32A32_FLOAT"; + case PixelFormat::R32G32B32A32_SINT: + return "R32G32B32A32_SINT"; + case PixelFormat::R32G32_FLOAT: + return "R32G32_FLOAT"; + case PixelFormat::R32G32_SINT: + return "R32G32_SINT"; + case PixelFormat::R32_FLOAT: + return "R32_FLOAT"; + case PixelFormat::R16_FLOAT: + return "R16_FLOAT"; + case PixelFormat::R16_UNORM: + return "R16_UNORM"; + case PixelFormat::R16_SNORM: + return "R16_SNORM"; + case PixelFormat::R16_UINT: + return "R16_UINT"; + case PixelFormat::R16_SINT: + return "R16_SINT"; + case PixelFormat::R16G16_UNORM: + return "R16G16_UNORM"; + case PixelFormat::R16G16_FLOAT: + return "R16G16_FLOAT"; + case PixelFormat::R16G16_UINT: + return "R16G16_UINT"; + case PixelFormat::R16G16_SINT: + return "R16G16_SINT"; + case PixelFormat::R16G16_SNORM: + return "R16G16_SNORM"; + case PixelFormat::R32G32B32_FLOAT: + return "R32G32B32_FLOAT"; + case PixelFormat::A8B8G8R8_SRGB: + return "A8B8G8R8_SRGB"; + case PixelFormat::R8G8_UNORM: + return "R8G8_UNORM"; + case PixelFormat::R8G8_SNORM: + return "R8G8_SNORM"; + case PixelFormat::R8G8_SINT: + return "R8G8_SINT"; + case PixelFormat::R8G8_UINT: + return "R8G8_UINT"; + case PixelFormat::R32G32_UINT: + return "R32G32_UINT"; + case PixelFormat::R16G16B16X16_FLOAT: + return "R16G16B16X16_FLOAT"; + case PixelFormat::R32_UINT: + return "R32_UINT"; + case PixelFormat::R32_SINT: + return "R32_SINT"; + case PixelFormat::ASTC_2D_8X8_UNORM: + return "ASTC_2D_8X8_UNORM"; + case PixelFormat::ASTC_2D_8X5_UNORM: + return "ASTC_2D_8X5_UNORM"; + case PixelFormat::ASTC_2D_5X4_UNORM: + return "ASTC_2D_5X4_UNORM"; + case PixelFormat::B8G8R8A8_SRGB: + return "B8G8R8A8_SRGB"; + case PixelFormat::BC1_RGBA_SRGB: + return "BC1_RGBA_SRGB"; + case PixelFormat::BC2_SRGB: + return "BC2_SRGB"; + case PixelFormat::BC3_SRGB: + return "BC3_SRGB"; + case PixelFormat::BC7_SRGB: + return "BC7_SRGB"; + case PixelFormat::A4B4G4R4_UNORM: + return "A4B4G4R4_UNORM"; + case PixelFormat::ASTC_2D_4X4_SRGB: + return "ASTC_2D_4X4_SRGB"; + case PixelFormat::ASTC_2D_8X8_SRGB: + return "ASTC_2D_8X8_SRGB"; + case PixelFormat::ASTC_2D_8X5_SRGB: + return "ASTC_2D_8X5_SRGB"; + case PixelFormat::ASTC_2D_5X4_SRGB: + return "ASTC_2D_5X4_SRGB"; + case PixelFormat::ASTC_2D_5X5_UNORM: + return "ASTC_2D_5X5_UNORM"; + case PixelFormat::ASTC_2D_5X5_SRGB: + return "ASTC_2D_5X5_SRGB"; + case PixelFormat::ASTC_2D_10X8_UNORM: + return "ASTC_2D_10X8_UNORM"; + case PixelFormat::ASTC_2D_10X8_SRGB: + return "ASTC_2D_10X8_SRGB"; + case PixelFormat::ASTC_2D_6X6_UNORM: + return "ASTC_2D_6X6_UNORM"; + case PixelFormat::ASTC_2D_6X6_SRGB: + return "ASTC_2D_6X6_SRGB"; + case PixelFormat::ASTC_2D_10X10_UNORM: + return "ASTC_2D_10X10_UNORM"; + case PixelFormat::ASTC_2D_10X10_SRGB: + return "ASTC_2D_10X10_SRGB"; + case PixelFormat::ASTC_2D_12X12_UNORM: + return "ASTC_2D_12X12_UNORM"; + case PixelFormat::ASTC_2D_12X12_SRGB: + return "ASTC_2D_12X12_SRGB"; + case PixelFormat::ASTC_2D_8X6_UNORM: + return "ASTC_2D_8X6_UNORM"; + case PixelFormat::ASTC_2D_8X6_SRGB: + return "ASTC_2D_8X6_SRGB"; + case PixelFormat::ASTC_2D_6X5_UNORM: + return "ASTC_2D_6X5_UNORM"; + case PixelFormat::ASTC_2D_6X5_SRGB: + return "ASTC_2D_6X5_SRGB"; + case PixelFormat::E5B9G9R9_FLOAT: + return "E5B9G9R9_FLOAT"; + case PixelFormat::D32_FLOAT: + return "D32_FLOAT"; + case PixelFormat::D16_UNORM: + return "D16_UNORM"; + case PixelFormat::D24_UNORM_S8_UINT: + return "D24_UNORM_S8_UINT"; + case PixelFormat::S8_UINT_D24_UNORM: + return "S8_UINT_D24_UNORM"; + case PixelFormat::D32_FLOAT_S8_UINT: + return "D32_FLOAT_S8_UINT"; + case PixelFormat::MaxDepthStencilFormat: + case PixelFormat::Invalid: + return "Invalid"; + } + return "Invalid"; + }(); + return formatter::format(name, ctx); + } +}; + +template <> +struct fmt::formatter : fmt::formatter { + template + auto format(VideoCommon::ImageType type, FormatContext& ctx) { + const string_view name = [type] { + using VideoCommon::ImageType; + switch (type) { + case ImageType::e1D: + return "1D"; + case ImageType::e2D: + return "2D"; + case ImageType::e3D: + return "3D"; + case ImageType::Linear: + return "Linear"; + case ImageType::Buffer: + return "Buffer"; + } + return "Invalid"; + }(); + return formatter::format(name, ctx); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, + extent.depth); + } +}; + +namespace VideoCommon { + +struct ImageBase; +struct ImageViewBase; +struct RenderTargets; + +[[nodiscard]] std::string Name(const ImageBase& image); + +[[nodiscard]] std::string Name(const ImageViewBase& image_view, + std::optional type = std::nullopt); + +[[nodiscard]] std::string Name(const RenderTargets& render_targets); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..448a05fcc --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp @@ -0,0 +1,216 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/util.h" + +namespace VideoCommon { + +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; + +namespace { +/// Returns the base layer and mip level offset +[[nodiscard]] std::pair LayerMipOffset(s32 diff, u32 layer_stride) { + if (layer_stride == 0) { + return {0, diff}; + } else { + return {diff / layer_stride, diff % layer_stride}; + } +} + +[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { + return layers.base_level < info.resources.levels && + layers.base_layer + layers.num_layers <= info.resources.layers; +} + +[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { + const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); + const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); + if (!ValidateLayers(copy.src_subresource, src)) { + return false; + } + if (!ValidateLayers(copy.dst_subresource, dst)) { + return false; + } + if (copy.src_offset.x + copy.extent.width > src_size.width || + copy.src_offset.y + copy.extent.height > src_size.height || + copy.src_offset.z + copy.extent.depth > src_size.depth) { + return false; + } + if (copy.dst_offset.x + copy.extent.width > dst_size.width || + copy.dst_offset.y + copy.extent.height > dst_size.height || + copy.dst_offset.z + copy.extent.depth > dst_size.depth) { + return false; + } + return true; +} +} // Anonymous namespace + +ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) + : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, + unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, + converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, + cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, + mip_level_offsets{CalculateMipLevelOffsets(info)} { + if (info.type == ImageType::e3D) { + slice_offsets = CalculateSliceOffsets(info); + slice_subresources = CalculateSliceSubresources(info); + } +} + +std::optional ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { + if (other_addr < gpu_addr) { + // Subresource address can't be lower than the base + return std::nullopt; + } + const u32 diff = static_cast(other_addr - gpu_addr); + if (diff > guest_size_bytes) { + // This can happen when two CPU addresses are used for different GPU addresses + return std::nullopt; + } + if (info.type != ImageType::e3D) { + const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); + const auto end = mip_level_offsets.begin() + info.resources.levels; + const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + if (layer > info.resources.layers || it == end) { + return std::nullopt; + } + return SubresourceBase{ + .level = static_cast(std::distance(mip_level_offsets.begin(), it)), + .layer = layer, + }; + } else { + // TODO: Consider using binary_search after a threshold + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.cend()) { + return std::nullopt; + } + return slice_subresources[std::distance(slice_offsets.begin(), it)]; + } +} + +ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { + const auto it = std::ranges::find(image_view_infos, view_info); + if (it == image_view_infos.end()) { + return ImageViewId{}; + } + return image_view_ids[std::distance(image_view_infos.begin(), it)]; +} + +void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { + image_view_infos.push_back(view_info); + image_view_ids.push_back(image_view_id); +} + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { + static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; + ASSERT(lhs.info.type == rhs.info.type); + std::optional base; + if (lhs.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); + } + if (!base) { + LOG_ERROR(HW_GPU, "Image alias should have been flipped"); + return; + } + const PixelFormat lhs_format = lhs.info.format; + const PixelFormat rhs_format = rhs.info.format; + const Extent2D lhs_block{ + .width = DefaultBlockWidth(lhs_format), + .height = DefaultBlockHeight(lhs_format), + }; + const Extent2D rhs_block{ + .width = DefaultBlockWidth(rhs_format), + .height = DefaultBlockHeight(rhs_format), + }; + const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; + const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; + if (is_lhs_compressed && is_rhs_compressed) { + LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); + return; + } + const s32 lhs_mips = lhs.info.resources.levels; + const s32 rhs_mips = rhs.info.resources.levels; + const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); + AliasedImage lhs_alias; + AliasedImage rhs_alias; + lhs_alias.id = rhs_id; + rhs_alias.id = lhs_id; + lhs_alias.copies.reserve(num_mips); + rhs_alias.copies.reserve(num_mips); + for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { + Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); + Extent3D rhs_size = MipSize(rhs.info.size, mip_level); + if (is_lhs_compressed) { + lhs_size.width /= lhs_block.width; + lhs_size.height /= lhs_block.height; + } + if (is_rhs_compressed) { + rhs_size.width /= rhs_block.width; + rhs_size.height /= rhs_block.height; + } + const Extent3D copy_size{ + .width = std::min(lhs_size.width, rhs_size.width), + .height = std::min(lhs_size.height, rhs_size.height), + .depth = std::min(lhs_size.depth, rhs_size.depth), + }; + if (copy_size.width == 0 || copy_size.height == 0) { + LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); + continue; + } + const bool is_lhs_3d = lhs.info.type == ImageType::e3D; + const bool is_rhs_3d = rhs.info.type == ImageType::e3D; + const Offset3D lhs_offset{0, 0, 0}; + const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; + const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; + const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; + const s32 num_layers = std::min(lhs_layers, rhs_layers); + const SubresourceLayers lhs_subresource{ + .base_level = mip_level, + .base_layer = 0, + .num_layers = num_layers, + }; + const SubresourceLayers rhs_subresource{ + .base_level = base->level + mip_level, + .base_layer = is_rhs_3d ? 0 : base->layer, + .num_layers = num_layers, + }; + [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = lhs_subresource, + .dst_subresource = rhs_subresource, + .src_offset = lhs_offset, + .dst_offset = rhs_offset, + .extent = copy_size, + }); + [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = rhs_subresource, + .dst_subresource = lhs_subresource, + .src_offset = rhs_offset, + .dst_offset = lhs_offset, + .extent = copy_size, + }); + ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); + ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); + } + ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); + if (lhs_alias.copies.empty()) { + return; + } + lhs.aliased_images.push_back(std::move(lhs_alias)); + rhs.aliased_images.push_back(std::move(rhs_alias)); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h @@ -0,0 +1,83 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +enum class ImageFlagBits : u32 { + AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU + Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted + CpuModified = 1 << 2, ///< Contents have been modified from the CPU + GpuModified = 1 << 3, ///< Contents have been modified from the GPU + Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT + Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted + Registered = 1 << 6, ///< True when the image is registered + Picked = 1 << 7, ///< Temporary flag to mark the image as picked +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) + +struct ImageViewInfo; + +struct AliasedImage { + std::vector copies; + ImageId id; +}; + +struct ImageBase { + explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + [[nodiscard]] std::optional TryFindBase(GPUVAddr other_addr) const noexcept; + + [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; + + void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + ImageInfo info; + + u32 guest_size_bytes = 0; + u32 unswizzled_size_bytes = 0; + u32 converted_size_bytes = 0; + ImageFlagBits flags = ImageFlagBits::CpuModified; + + GPUVAddr gpu_addr = 0; + VAddr cpu_addr = 0; + VAddr cpu_addr_end = 0; + + u64 modification_tick = 0; + u64 frame_tick = 0; + + std::array mip_level_offsets{}; + + std::vector image_view_infos; + std::vector image_view_ids; + + std::vector slice_offsets; + std::vector slice_subresources; + + std::vector aliased_images; +}; + +struct ImageAllocBase { + std::vector images; +}; + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp @@ -0,0 +1,189 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +ImageInfo::ImageInfo(const TICEntry& config) noexcept { + format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); + num_samples = NumSamples(config.msaa_mode); + resources.levels = config.max_mip_level + 1; + if (config.IsPitchLinear()) { + pitch = config.Pitch(); + } else if (config.IsBlockLinear()) { + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + } + tile_width_spacing = config.tile_width_spacing; + if (config.texture_type != TextureType::Texture2D && + config.texture_type != TextureType::Texture2DNoMipmap) { + ASSERT(!config.IsPitchLinear()); + } + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e1D; + size.width = config.Width(); + break; + case TextureType::Texture1DArray: + UNIMPLEMENTED_IF(config.BaseLayer() != 0); + type = ImageType::e1D; + size.width = config.Width(); + resources.layers = config.Depth(); + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 1; + break; + case TextureType::Texture2DArray: + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth(); + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 6; + break; + case TextureType::TextureCubeArray: + UNIMPLEMENTED_IF(config.load_store_hint != 0); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth() * 6; + break; + case TextureType::Texture3D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e3D; + size.width = config.Width(); + size.height = config.Height(); + size.depth = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageType::Buffer; + size.width = config.Width(); + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); + break; + } + if (type != ImageType::Linear) { + // FIXME: Call this without passing *this + layer_stride = CalculateLayerStride(*this); + maybe_unaligned_layer_stride = CalculateLayerSize(*this); + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { + const auto& rt = regs.rt[index]; + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + if (rt.tile_mode.is_pitch_linear) { + ASSERT(rt.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = rt.width; + size = Extent3D{ + .width = pitch / BytesPerBlock(format), + .height = rt.height, + .depth = 1, + }; + return; + } + size.width = rt.width; + size.height = rt.height; + layer_stride = rt.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = rt.tile_mode.block_width, + .height = rt.tile_mode.block_height, + .depth = rt.tile_mode.block_depth, + }; + if (rt.tile_mode.is_3d) { + type = ImageType::e3D; + size.depth = rt.depth; + } else { + type = ImageType::e2D; + resources.layers = rt.depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { + format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); + size.width = regs.zeta_width; + size.height = regs.zeta_height; + resources.levels = 1; + layer_stride = regs.zeta.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = regs.zeta.tile_mode.block_width, + .height = regs.zeta.tile_mode.block_height, + .depth = regs.zeta.tile_mode.block_depth, + }; + if (regs.zeta.tile_mode.is_pitch_linear) { + ASSERT(regs.zeta.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = size.width * BytesPerBlock(format); + } else if (regs.zeta.tile_mode.is_3d) { + ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); + type = ImageType::e3D; + size.depth = regs.zeta_depth; + } else { + type = ImageType::e2D; + resources.layers = regs.zeta_depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { + UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { + type = ImageType::Linear; + size = Extent3D{ + .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), + .height = config.height, + .depth = 1, + }; + pitch = config.pitch; + } else { + type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + // 3D blits with more than once slice are not implemented for now + // Render to individual slices + size = Extent3D{ + .width = config.width, + .height = config.height, + .depth = 1, + }; + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h @@ -0,0 +1,38 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +struct ImageInfo { + explicit ImageInfo() = default; + explicit ImageInfo(const TICEntry& config) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; + explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; + + PixelFormat format = PixelFormat::Invalid; + ImageType type = ImageType::e1D; + SubresourceExtent resources; + Extent3D size{1, 1, 1}; + union { + Extent3D block{0, 0, 0}; + u32 pitch; + }; + u32 layer_stride = 0; + u32 maybe_unaligned_layer_stride = 0; + u32 num_samples = 1; + u32 tile_width_spacing = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..076a4bcfd --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "core/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id_) + : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, + size{ + .width = std::max(image_info.size.width >> range.base.level, 1u), + .height = std::max(image_info.size.height >> range.base.level, 1u), + .depth = std::max(image_info.size.depth >> range.base.level, 1u), + } { + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), + "Image view format {} is incompatible with image format {}", info.format, + image_info.format); + const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + if (image_info.type == ImageType::Linear && is_async) { + flags |= ImageViewFlagBits::PreemtiveDownload; + } + if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { + flags |= ImageViewFlagBits::Slice; + } +} + +ImageViewBase::ImageViewBase(const NullImageParams&) {} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using VideoCore::Surface::PixelFormat; + +struct ImageViewInfo; +struct ImageInfo; + +struct NullImageParams {}; + +enum class ImageViewFlagBits : u16 { + PreemtiveDownload = 1 << 0, + Strong = 1 << 1, + Slice = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) + +struct ImageViewBase { + explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id); + explicit ImageViewBase(const NullImageParams&); + + [[nodiscard]] bool IsBuffer() const noexcept { + return type == ImageViewType::Buffer; + } + + ImageId image_id{}; + PixelFormat format{}; + ImageViewType type{}; + SubresourceRange range; + Extent3D size{0, 0, 0}; + ImageViewFlagBits flags{}; + + u64 invalidation_tick = 0; + u64 modification_tick = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -0,0 +1,88 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +namespace { + +constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits::max(); + +[[nodiscard]] u8 CastSwizzle(SwizzleSource source) { + const u8 casted = static_cast(source); + ASSERT(static_cast(casted) == source); + return casted; +} + +} // Anonymous namespace + +ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept + : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, + y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, + w_source{CastSwizzle(config.w_source)} { + range.base = SubresourceBase{ + .level = static_cast(config.res_min_mip_level), + .layer = base_layer, + }; + range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; + + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.Height() == 1); + ASSERT(config.Depth() == 1); + type = ImageViewType::e1D; + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; + break; + case TextureType::Texture3D: + type = ImageViewType::e3D; + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageViewType::Cube; + range.extent.layers = 6; + break; + case TextureType::Texture1DArray: + type = ImageViewType::e1DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture2DArray: + type = ImageViewType::e2DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageViewType::Buffer; + break; + case TextureType::TextureCubeArray: + type = ImageViewType::CubeArray; + range.extent.layers = config.Depth() * 6; + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); + break; + } +} + +ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, + SubresourceRange range_) noexcept + : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, + y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, + w_source{RENDER_TARGET_SWIZZLE} {} + +bool ImageViewInfo::IsRenderTarget() const noexcept { + return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && + z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +/// Properties used to determine a image view +struct ImageViewInfo { + explicit ImageViewInfo() noexcept = default; + explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; + explicit ImageViewInfo(ImageViewType type, PixelFormat format, + SubresourceRange range = {}) noexcept; + + auto operator<=>(const ImageViewInfo&) const noexcept = default; + + [[nodiscard]] bool IsRenderTarget() const noexcept; + + [[nodiscard]] std::array Swizzle() const noexcept { + return std::array{ + static_cast(x_source), + static_cast(y_source), + static_cast(z_source), + static_cast(w_source), + }; + } + + ImageViewType type{}; + PixelFormat format{}; + SubresourceRange range; + u8 x_source = static_cast(SwizzleSource::R); + u8 y_source = static_cast(SwizzleSource::G); + u8 z_source = static_cast(SwizzleSource::B); + u8 w_source = static_cast(SwizzleSource::A); +}; +static_assert(std::has_unique_object_representations_v); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_cast.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +/// Framebuffer properties used to lookup a framebuffer +struct RenderTargets { + constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + + constexpr bool Contains(std::span elements) const noexcept { + const auto contains = [elements](ImageViewId item) { + return std::ranges::find(elements, item) != elements.end(); + }; + return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); + } + + std::array color_buffer_ids; + ImageViewId depth_buffer_id; + std::array draw_buffers{}; + Extent2D size; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { + using VideoCommon::ImageViewId; + size_t value = std::hash{}(rt.depth_buffer_id); + for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { + value ^= std::hash{}(color_buffer_id); + } + value ^= Common::BitCast(rt.draw_buffers); + value ^= Common::BitCast(rt.size); + return value; + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h @@ -0,0 +1,55 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/assert.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +[[nodiscard]] inline std::pair SamplesLog2(int num_samples) { + switch (num_samples) { + case 1: + return {0, 0}; + case 2: + return {1, 0}; + case 4: + return {1, 1}; + case 8: + return {2, 1}; + case 16: + return {2, 2}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + return 1; + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + return 2; + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + return 4; + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + return 8; + case MsaaMode::Msaa4x4: + return 16; + } + UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast(msaa_mode)); + return 1; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h @@ -0,0 +1,156 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" + +namespace VideoCommon { + +struct SlotId { + static constexpr u32 INVALID_INDEX = std::numeric_limits::max(); + + constexpr auto operator<=>(const SlotId&) const noexcept = default; + + constexpr explicit operator bool() const noexcept { + return index != INVALID_INDEX; + } + + u32 index = INVALID_INDEX; +}; + +template +requires std::is_nothrow_move_assignable_v&& + std::is_nothrow_move_constructible_v class SlotVector { +public: + ~SlotVector() noexcept { + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + if ((bits & 1) != 0) { + values[index + bit].object.~T(); + } + } + index += 64; + } + delete[] values; + } + + [[nodiscard]] T& operator[](SlotId id) noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + [[nodiscard]] const T& operator[](SlotId id) const noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + template + [[nodiscard]] SlotId insert(Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + new (&values[index].object) T(std::forward(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + + void erase(SlotId id) noexcept { + values[id.index].object.~T(); + free_list.push_back(id.index); + ResetStorageBit(id.index); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Entry { + Entry() noexcept : dummy{} {} + ~Entry() noexcept {} + + NonTrivialDummy dummy; + T object; + }; + + void SetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] |= u64(1) << (index % 64); + } + + void ResetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); + } + + bool ReadStorageBit(u32 index) noexcept { + return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; + } + + void ValidateIndex(SlotId id) const noexcept { + DEBUG_ASSERT(id); + DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + } + + [[nodiscard]] u32 FreeValueIndex() noexcept { + if (free_list.empty()) { + Reserve(values_capacity ? (values_capacity << 1) : 1); + } + const u32 free_index = free_list.back(); + free_list.pop_back(); + return free_index; + } + + void Reserve(size_t new_capacity) noexcept { + Entry* const new_values = new Entry[new_capacity]; + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + const size_t i = index + bit; + if ((bits & 1) == 0) { + continue; + } + T& old_value = values[i].object; + new (&new_values[i].object) T(std::move(old_value)); + old_value.~T(); + } + index += 64; + } + + stored_bitset.resize((new_capacity + 63) / 64); + + const size_t old_free_size = free_list.size(); + free_list.resize(old_free_size + (new_capacity - values_capacity)); + std::iota(free_list.begin() + old_free_size, free_list.end(), + static_cast(values_capacity)); + + delete[] values; + values = new_values; + values_capacity = new_capacity; + } + + Entry* values = nullptr; + size_t values_capacity = 0; + size_t values_size = 0; + + std::vector stored_bitset; + std::vector free_list; +}; + +} // namespace VideoCommon + +template <> +struct std::hash { + size_t operator()(const VideoCommon::SlotId& id) const noexcept { + return std::hash{}(id.index); + } +}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index efbcf6723..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/algorithm.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/convert.h" - -namespace VideoCommon { - -MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); - -using Tegra::Texture::ConvertFromGuestToHost; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::IsPixelFormatASTC; -using VideoCore::Surface::PixelFormat; - -StagingCache::StagingCache() = default; - -StagingCache::~StagingCache() = default; - -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), - mipmap_offsets(params.num_levels) { - is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; - host_memory_size = params.GetHostSizeInBytes(is_converted); - - std::size_t offset = 0; - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes[level] = mipmap_size; - mipmap_offsets[level] = offset; - offset += mipmap_size; - } - layer_size = offset; - if (params.is_layered) { - if (params.is_tiled) { - layer_size = - SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); - } - guest_memory_size = layer_size * params.depth; - } else { - guest_memory_size = layer_size; - } -} - -MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; -} - -MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } else { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, - rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; -} - -std::optional> SurfaceBaseImpl::GetLayerMipmap( - const GPUVAddr candidate_gpu_addr) const { - if (gpu_addr == candidate_gpu_addr) { - return {{0, 0}}; - } - - if (candidate_gpu_addr < gpu_addr) { - return std::nullopt; - } - - const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast(relative_address / layer_size)}; - if (layer >= params.depth) { - return std::nullopt; - } - - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return std::nullopt; - } - - const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); -} - -std::vector SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(static_cast(layers) * static_cast(mipmaps)); - - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); - } - } - return result; -} - -std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(mipmaps); - - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; -} - -void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, - const SurfaceParams& surface_params, u8* buffer, u32 level) { - const u32 width{surface_params.GetMipWidth(level)}; - const u32 height{surface_params.GetMipHeight(level)}; - const u32 block_height{surface_params.GetMipBlockHeight(level)}; - const u32 block_depth{surface_params.GetMipBlockDepth(level)}; - - std::size_t guest_offset{mipmap_offsets[level]}; - if (surface_params.is_layered) { - std::size_t host_offset = 0; - const std::size_t guest_stride = layer_size; - const std::size_t host_stride = surface_params.GetHostLayerSize(level); - for (u32 layer = 0; layer < surface_params.depth; ++layer) { - MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, - block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, - memory + guest_offset); - guest_offset += guest_stride; - host_offset += host_stride; - } - } else { - MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, - surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, - memory + guest_offset); - } -} - -void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Load_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", - params.block_width, static_cast(params.target)); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else { - ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{params.GetBytesPerPixel()}; - const u32 block_width{params.GetDefaultBlockWidth()}; - const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.width + block_width - 1) / block_width}; - const u32 height{(params.height + block_height - 1) / block_height}; - const u32 copy_size{width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); - } else { - const u8* start{host_ptr}; - u8* write_to{staging_buffer.data()}; - for (u32 h = height; h > 0; --h) { - std::memcpy(write_to, start, copy_size); - start += params.pitch; - write_to += copy_size; - } - } - } - - if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { - return; - } - - for (u32 level = params.num_levels; level--;) { - const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; - const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; - u8* const in_buffer = staging_buffer.data() + in_host_offset; - u8* const out_buffer = staging_buffer.data() + out_host_offset; - ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, - params.GetMipWidth(level), params.GetMipHeight(level), - params.GetMipDepth(level), true, true); - } -} - -void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - - if (params.target == SurfaceTarget::Texture3D) { - // Special case for 3D texture segments - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else if (params.IsBuffer()) { - // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest - // memory. - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - ASSERT(params.target == SurfaceTarget::Texture2D); - ASSERT(params.num_levels == 1); - - const u32 bpp{params.GetBytesPerPixel()}; - const u32 copy_size{params.width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - u8* start{host_ptr}; - const u8* read_to{staging_buffer.data()}; - for (u32 h = params.height; h > 0; --h) { - std::memcpy(start, read_to, copy_size); - start += params.pitch; - read_to += copy_size; - } - } - } - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index b57135fe4..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/gpu.h" -#include "video_core/morton.h" -#include "video_core/texture_cache/copy_params.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon { - -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceTarget; - -enum class MatchStructureResult : u32 { - FullMatch = 0, - SemiMatch = 1, - None = 2, -}; - -enum class MatchTopologyResult : u32 { - FullMatch = 0, - CompressUnmatch = 1, - None = 2, -}; - -class StagingCache { -public: - explicit StagingCache(); - ~StagingCache(); - - std::vector& GetBuffer(std::size_t index) { - return staging_buffer[index]; - } - - const std::vector& GetBuffer(std::size_t index) const { - return staging_buffer[index]; - } - - void SetSize(std::size_t size) { - staging_buffer.resize(size); - } - -private: - std::vector> staging_buffer; -}; - -class SurfaceBaseImpl { -public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - bool Overlaps(const VAddr start, const VAddr end) const { - return (cpu_addr < end) && (cpu_addr_end > start); - } - - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { - const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return gpu_addr <= other_start && other_end <= gpu_addr_end; - } - - // Use only when recycling a surface - void SetGpuAddr(const GPUVAddr new_addr) { - gpu_addr = new_addr; - } - - VAddr GetCpuAddr() const { - return cpu_addr; - } - - VAddr GetCpuAddrEnd() const { - return cpu_addr_end; - } - - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - cpu_addr_end = new_addr + guest_memory_size; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - std::size_t GetSizeInBytes() const { - return guest_memory_size; - } - - std::size_t GetHostSizeInBytes() const { - return host_memory_size; - } - - std::size_t GetMipmapSize(const u32 level) const { - return mipmap_sizes[level]; - } - - bool IsLinear() const { - return !params.is_tiled; - } - - bool IsConverted() const { - return is_converted; - } - - bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { - return params.pixel_format == pixel_format; - } - - VideoCore::Surface::PixelFormat GetFormat() const { - return params.pixel_format; - } - - bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { - return params.target == target; - } - - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; - - bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { - return std::tie(gpu_addr, params.target, params.num_levels) == - std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && - params.target == SurfaceTarget::Texture2D && params.num_levels == 1; - } - - std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; - - std::vector BreakDown(const SurfaceParams& in_params) const { - return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); - } - -protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_); - ~SurfaceBaseImpl() = default; - - virtual void DecorateSurfaceName() = 0; - - const SurfaceParams params; - std::size_t layer_size; - std::size_t guest_memory_size; - std::size_t host_memory_size; - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - VAddr cpu_addr_end{}; - bool is_converted{}; - - std::vector mipmap_sizes; - std::vector mipmap_offsets; - -private: - void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, - u8* buffer, u32 level); - - std::vector BreakDownLayered(const SurfaceParams& in_params) const; - - std::vector BreakDownNonLayered(const SurfaceParams& in_params) const; -}; - -template -class SurfaceBase : public SurfaceBaseImpl { -public: - virtual void UploadTexture(const std::vector& staging_buffer) = 0; - - virtual void DownloadTexture(std::vector& staging_buffer) = 0; - - void MarkAsModified(bool is_modified_, u64 tick) { - is_modified = is_modified_ || is_target; - modification_tick = tick; - } - - void MarkAsRenderTarget(bool is_target_, u32 index_) { - is_target = is_target_; - index = index_; - } - - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; - } - - bool IsMemoryMarked() const { - return is_memory_marked; - } - - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } - - bool IsSyncPending() const { - return is_sync_pending; - } - - void MarkAsPicked(bool is_picked_) { - is_picked = is_picked_; - } - - bool IsModified() const { - return is_modified; - } - - bool IsProtected() const { - // Only 3D slices are to be protected - return is_target && params.target == SurfaceTarget::Texture3D; - } - - bool IsRenderTarget() const { - return is_target; - } - - u32 GetRenderTarget() const { - return index; - } - - bool IsRegistered() const { - return is_registered; - } - - bool IsPicked() const { - return is_picked; - } - - void MarkAsRegistered(bool is_reg) { - is_registered = is_reg; - } - - u64 GetModificationTick() const { - return modification_tick; - } - - TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; - return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); - } - - TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { - return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, - base_level, num_levels)); - } - - std::optional EmplaceIrregularView(const SurfaceParams& view_params, - const GPUVAddr view_addr, - const std::size_t candidate_size, const u32 mipmap, - const u32 layer) { - const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; - if (!layer_mipmap) { - return {}; - } - const auto [end_layer, end_mipmap] = *layer_mipmap; - if (layer != end_layer) { - if (mipmap == 0 && end_mipmap == 0) { - return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); - } - return {}; - } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); - } - } - - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, - const std::size_t candidate_size) { - if (params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered)) { - return {}; - } - const auto layer_mipmap{GetLayerMipmap(view_addr)}; - if (!layer_mipmap) { - return {}; - } - const auto [layer, mipmap] = *layer_mipmap; - if (GetMipmapSize(mipmap) != candidate_size) { - return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); - } - return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); - } - - TView GetMainView() const { - return main_view; - } - -protected: - explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} - - ~SurfaceBase() = default; - - virtual TView CreateView(const ViewParams& view_key) = 0; - - TView main_view; - std::unordered_map views; - -private: - TView GetView(const ViewParams& key) { - const auto [entry, is_cache_miss] = views.try_emplace(key); - auto& view{entry->second}; - if (is_cache_miss) { - view = CreateView(key); - } - return view; - } - - static constexpr u32 NO_RT = 0xFFFFFFFF; - - bool is_modified{}; - bool is_target{}; - bool is_registered{}; - bool is_picked{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u32 index{NO_RT}; - u64 modification_tick{}; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 96f93246d..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "core/core.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceTargetFromTextureType; -using VideoCore::Surface::SurfaceType; - -namespace { - -SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { - switch (type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return SurfaceTarget::Texture3D; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { - switch (type) { - case Tegra::Shader::ImageType::Texture1D: - return SurfaceTarget::Texture1D; - case Tegra::Shader::ImageType::TextureBuffer: - return SurfaceTarget::TextureBuffer; - case Tegra::Shader::ImageType::Texture1DArray: - return SurfaceTarget::Texture1DArray; - case Tegra::Shader::ImageType::Texture2D: - return SurfaceTarget::Texture2D; - case Tegra::Shader::ImageType::Texture2DArray: - return SurfaceTarget::Texture2DArray; - case Tegra::Shader::ImageType::Texture3D: - return SurfaceTarget::Texture3D; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { - return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); -} - -} // Anonymous namespace - -SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { - switch (params.pixel_format) { - case PixelFormat::R16_UNORM: - case PixelFormat::R16_FLOAT: - params.pixel_format = PixelFormat::D16_UNORM; - break; - case PixelFormat::R32_FLOAT: - params.pixel_format = PixelFormat::D32_FLOAT; - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", - static_cast(params.pixel_format)); - } - params.type = GetFormatType(params.pixel_format); - } - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - params.target = ImageTypeToSurfaceTarget(entry.type); - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { - const auto& regs = maxwell3d.regs; - const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); - const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; - const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - return { - .is_tiled = regs.zeta.memory_layout.type == - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, - .srgb_conversion = false, - .is_layered = is_layered, - .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), - .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), - .block_depth = block_depth, - .tile_width_spacing = 1, - .width = regs.zeta_width, - .height = regs.zeta_height, - .depth = is_layered ? regs.zeta_layers.Value() : 1U, - .pitch = 0, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, - }; -} - -SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index) { - const auto& config{maxwell3d.regs.rt[index]}; - SurfaceParams params; - params.is_tiled = - config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; - params.block_width = config.memory_layout.block_width; - params.block_height = config.memory_layout.block_height; - params.block_depth = config.memory_layout.block_depth; - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.type = GetFormatType(params.pixel_format); - if (params.is_tiled) { - params.pitch = 0; - params.width = config.width; - } else { - const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; - params.pitch = config.width; - params.width = params.pitch / bpp; - } - params.height = config.height; - params.num_levels = 1; - params.emulated_levels = 1; - - if (config.memory_layout.is_3d != 0) { - params.depth = config.layers.Value(); - params.is_layered = false; - params.target = SurfaceTarget::Texture3D; - } else if (config.layers > 1) { - params.depth = config.layers.Value(); - params.is_layered = true; - params.target = SurfaceTarget::Texture2DArray; - } else { - params.depth = 1; - params.is_layered = false; - params.target = SurfaceTarget::Texture2D; - } - return params; -} - -SurfaceParams SurfaceParams::CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - const bool is_tiled = !config.linear; - const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); - - SurfaceParams params{ - .is_tiled = is_tiled, - .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, - .is_layered = false, - .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, - .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, - .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, - .tile_width_spacing = 1, - .width = config.width, - .height = config.height, - .depth = 1, - .pitch = config.pitch, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - // TODO(Rodrigo): Try to guess texture arrays from parameters - .target = SurfaceTarget::Texture2D, - }; - - params.is_layered = params.IsLayered(); - return params; -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Sampler& entry) { - return TextureTypeToSurfaceTarget(entry.type, entry.is_array); -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Image& entry) { - return ImageTypeToSurfaceTarget(entry.type); -} - -bool SurfaceParams::IsLayered() const { - switch (target) { - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } -} - -// Auto block resizing algorithm from: -// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c -u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - if (level == 0) { - return this->block_height; - } - - const u32 height_new{GetMipHeight(level)}; - const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; - const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height_new, 3U, 7U) - 3U; -} - -u32 SurfaceParams::GetMipBlockDepth(u32 level) const { - if (level == 0) { - return this->block_depth; - } - if (is_layered) { - return 0; - } - - const u32 depth_new{GetMipDepth(level)}; - const u32 block_depth_new = Common::Log2Ceil32(depth_new); - if (block_depth_new > 4) { - return 5 - (GetMipBlockHeight(level) >= 2); - } - return block_depth_new; -} - -std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { - std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, false, false); - } - return offset; -} - -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { - std::size_t offset = 0; - if (is_converted) { - for (u32 i = 0; i < level; ++i) { - offset += GetConvertedMipmapSize(i) * GetNumLayers(); - } - } else { - for (u32 i = 0; i < level; ++i) { - offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); - } - } - return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgba8_bpp = 4ULL; - const std::size_t mip_width = GetMipWidth(level); - const std::size_t mip_height = GetMipHeight(level); - const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); - return mip_width * mip_height * mip_depth * rgba8_bpp; -} - -std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { - std::size_t size = 0; - for (u32 level = 0; level < num_levels; ++level) { - size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); - } - if (is_tiled && is_layered) { - return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - return size; -} - -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, - bool uncompressed) const { - const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; - const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; - if (is_tiled) { - return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, - mip_height, mip_depth, GetMipBlockHeight(level), - GetMipBlockDepth(level)); - } else if (as_host_size || IsBuffer()) { - return GetBytesPerPixel() * mip_width * mip_height * mip_depth; - } else { - // Linear Texture Case - return pitch * mip_height * mip_depth; - } -} - -bool SurfaceParams::operator==(const SurfaceParams& rhs) const { - return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, num_levels, pixel_format, type, target) == - std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); -} - -std::string SurfaceParams::TargetName() const { - switch (target) { - case SurfaceTarget::Texture1D: - return "1D"; - case SurfaceTarget::TextureBuffer: - return "TexBuffer"; - case SurfaceTarget::Texture2D: - return "2D"; - case SurfaceTarget::Texture3D: - return "3D"; - case SurfaceTarget::Texture1DArray: - return "1DArray"; - case SurfaceTarget::Texture2DArray: - return "2DArray"; - case SurfaceTarget::TextureCubemap: - return "Cube"; - case SurfaceTarget::TextureCubeArray: - return "CubeArray"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); - UNREACHABLE(); - return fmt::format("TUK({})", target); - } -} - -u32 SurfaceParams::GetBlockSize() const { - const u32 x = 64U << block_width; - const u32 y = 8U << block_height; - const u32 z = 1U << block_depth; - return x * y * z; -} - -std::pair SurfaceParams::GetBlockXY() const { - const u32 x_pixels = 64U / GetBytesPerPixel(); - const u32 x = x_pixels << block_width; - const u32 y = 8U << block_height; - return {x, y}; -} - -std::tuple SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { - const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 block_size = GetBlockSize(); - const u32 block_index = offset / block_size; - const u32 gob_offset = offset % block_size; - const u32 gob_index = gob_offset / static_cast(Tegra::Texture::GOB_SIZE); - const u32 x_gob_pixels = 64U / GetBytesPerPixel(); - const u32 x_block_pixels = x_gob_pixels << block_width; - const u32 y_block_pixels = 8U << block_height; - const u32 z_block_pixels = 1U << block_depth; - const u32 x_blocks = div_ceil(width, x_block_pixels); - const u32 y_blocks = div_ceil(height, y_block_pixels); - const u32 z_blocks = div_ceil(depth, z_block_pixels); - const u32 base_x = block_index % x_blocks; - const u32 base_y = (block_index / x_blocks) % y_blocks; - const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; - u32 x = base_x * x_block_pixels; - u32 y = base_y * y_block_pixels; - u32 z = base_z * z_block_pixels; - z += gob_index >> block_height; - y += (gob_index * 8U) % y_block_pixels; - return {x, y, z}; -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCommon { - -class FormatLookupTable; - -class SurfaceParams { -public: - /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry); - - /// Creates SurfaceCachedParams from an image configuration. - static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry); - - /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); - - /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index); - - /// Creates SurfaceCachedParams from a Fermi2D surface configuration. - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Sampler& entry); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Image& entry); - - std::size_t Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); - } - - bool operator==(const SurfaceParams& rhs) const; - - bool operator!=(const SurfaceParams& rhs) const { - return !operator==(rhs); - } - - std::size_t GetGuestSizeInBytes() const { - return GetInnerMemorySize(false, false, false); - } - - std::size_t GetHostSizeInBytes(bool is_converted) const { - if (!is_converted) { - return GetInnerMemorySize(true, false, false); - } - // ASTC is uncompressed in software, in emulated as RGBA8 - std::size_t host_size_in_bytes = 0; - for (u32 level = 0; level < num_levels; ++level) { - host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); - } - return host_size_in_bytes; - } - - u32 GetBlockAlignedWidth() const { - return Common::AlignUp(width, 64 / GetBytesPerPixel()); - } - - /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const { - return std::max(1U, width >> level); - } - - /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const { - return std::max(1U, height >> level); - } - - /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); - } - - /// Returns the block height of a given mipmap level. - u32 GetMipBlockHeight(u32 level) const; - - /// Returns the block depth of a given mipmap level. - u32 GetMipBlockDepth(u32 level) const; - - /// Returns the best possible row/pitch alignment for the surface. - u32 GetRowAlignment(u32 level, bool is_converted) const { - const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); - return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); - } - - /// Returns the offset in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; - - /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); - } - - /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); - } - - std::size_t GetConvertedMipmapSize(u32 level) const; - - /// Get this texture Tegra Block size in guest memory layout - u32 GetBlockSize() const; - - /// Get X, Y coordinates max sizes of a single block. - std::pair GetBlockXY() const; - - /// Get the offset in x, y, z coordinates from a memory offset - std::tuple GetBlockOffsetXYZ(u32 offset) const; - - /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const { - return GetLayerSize(false, false); - } - - /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const { - ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); - } - - /// Returns the max possible mipmap that the texture can have in host gpu - u32 MaxPossibleMipmap() const { - const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; - const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; - const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); - if (target != VideoCore::Surface::SurfaceTarget::Texture3D) - return max_mipmap; - return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); - } - - /// Returns if the guest surface is a compressed surface. - bool IsCompressed() const { - return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; - } - - /// Returns the default block width. - u32 GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); - } - - /// Returns the default block height. - u32 GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); - } - - /// Returns the bits per pixel. - u32 GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); - } - - /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); - } - - /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; - } - - /// Returns is the surface is a TextureBuffer type of surface. - bool IsBuffer() const { - return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; - } - - /// Returns the number of layers in the surface. - std::size_t GetNumLayers() const { - return is_layered ? depth : 1; - } - - /// Returns the debug name of the texture for use in graphic debuggers. - std::string TargetName() const; - - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - - /// Converts a width from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - /// Converts a height from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // Finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // Finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - - bool is_tiled; - bool srgb_conversion; - bool is_layered; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 num_levels; - u32 emulated_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; - -private: - /// Returns the size of a given mipmap level inside a layer. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; - - /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * - (layer_only ? 1U : (is_layered ? depth : 1U)); - } - - /// Returns the size of a layer - std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/texture_cache/surface_view.h" - -namespace VideoCommon { - -std::size_t ViewParams::Hash() const { - return static_cast(base_layer) ^ (static_cast(num_layers) << 16) ^ - (static_cast(base_level) << 24) ^ - (static_cast(num_levels) << 32) ^ (static_cast(target) << 36); -} - -bool ViewParams::operator==(const ViewParams& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels, target) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); -} - -bool ViewParams::operator!=(const ViewParams& rhs) const { - return !operator==(rhs); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 199f72732..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -struct ViewParams { - constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, - u32 num_layers_, u32 base_level_, u32 num_levels_) - : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, - base_level{base_level_}, num_levels{num_levels_} {} - - std::size_t Hash() const; - - bool operator==(const ViewParams& rhs) const; - bool operator!=(const ViewParams& rhs) const; - - bool IsLayered() const { - switch (target) { - case VideoCore::Surface::SurfaceTarget::Texture1DArray: - case VideoCore::Surface::SurfaceTarget::Texture2DArray: - case VideoCore::Surface::SurfaceTarget::TextureCubemap: - case VideoCore::Surface::SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } - } - - VideoCore::Surface::SurfaceTarget target{}; - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; -}; - -class ViewBase { -public: - constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} - - constexpr const ViewParams& GetViewParams() const { - return params; - } - -protected: - ViewParams params; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 581d8dd5b..968059842 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,1298 +6,1449 @@ #include #include -#include +#include #include #include -#include -#include +#include +#include +#include #include +#include #include #include -#include -#include -#include "common/assert.h" +#include "common/alignment.h" +#include "common/common_funcs.h" #include "common/common_types.h" -#include "common/math_util.h" -#include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" +#include "common/logging/log.h" #include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" -#include "video_core/texture_cache/copy_params.h" +#include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra::Texture { -struct FullTextureInfo; -} - -namespace VideoCore { -class RasterizerInterface; -} +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" namespace VideoCommon { -using VideoCore::Surface::FormatCompatibility; +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; -template +template class TextureCache { - using VectorSurface = boost::container::small_vector; + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_SHIFT = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } + }; public: - void InvalidateRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - Unregister(surface); - } - } + /// Notify the cache that a new frame has been queued + void TickFrame(); - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Return an unique mutually exclusive lock for the cache + [[nodiscard]] std::unique_lock AcquireLock(); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (surface->IsMemoryMarked()) { - UnmarkMemory(surface); - surface->SetSyncPending(true); - marked_for_unregister.emplace_back(surface); - } - } - } + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - void SyncGuestHost() { - std::lock_guard lock{mutex}; + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - for (const auto& surface : marked_for_unregister) { - if (surface->IsRegistered()) { - surface->SetSyncPending(false); - Unregister(surface); - } - } - marked_for_unregister.clear(); - } + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span indices, + std::span image_view_ids); - /** - * Guarantees that rendertargets don't unregister themselves if the - * collide. Protection is currently only done on 3D slices. - */ - void GuardRenderTargets(bool new_guard) { - guard_render_targets = new_guard; - } + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span indices, std::span image_view_ids); - void GuardSamplers(bool new_guard) { - guard_samplers = new_guard; - } + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); - void FlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); - auto surfaces = GetSurfacesInRegion(addr, size); - if (surfaces.empty()) { - return; - } - std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (const auto& surface : surfaces) { - mutex.unlock(); - FlushSurface(surface); - mutex.lock(); - } - } + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); - bool MustFlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); - const auto surfaces = GetSurfacesInRegion(addr, size); - return std::any_of(surfaces.cbegin(), surfaces.cend(), - [](const TSurface& surface) { return surface->IsModified(); }); - } + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); - TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); - if (!IsTypeCompatible(tic.texture_type, entry)) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); - const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); - TView GetImageSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); - bool TextureBarrier() { - const bool any_rt = - std::any_of(sampled_textures.begin(), sampled_textures.end(), - [](const auto& surface) { return surface->IsRenderTarget(); }); - sampled_textures.clear(); - return any_rt; - } + /// Invalidate the contents of the color buffer index + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateColorBuffer(size_t index); - TView GetDepthBufferSurface(bool preserve_contents) { - std::lock_guard lock{mutex}; - auto& dirty = maxwell3d.dirty; - if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { - return depth_buffer.view; - } - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; + /// Invalidate the contents of the depth buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateDepthBuffer(); - const auto& regs{maxwell3d.regs}; - const auto gpu_addr{regs.zeta.Address()}; - if (!gpu_addr || !regs.zeta_enable) { - SetEmptyDepthBuffer(); - return {}; - } - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyDepthBuffer(); - return {}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; - auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = surface_view.first; - depth_buffer.view = surface_view.second; - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); - return surface_view.second; - } - - TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { - std::lock_guard lock{mutex}; - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { - return render_targets[index].view; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); - const auto& regs{maxwell3d.regs}; - if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || - regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - SetEmptyColorBuffer(index); - return {}; - } + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; - const auto& config{regs.rt[index]}; - const auto gpu_addr{config.Address()}; - if (!gpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); - auto surface_view = - GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), - preserve_contents, true); - if (render_targets[index].target) { - auto& surface = render_targets[index].target; - surface->MarkAsRenderTarget(false, NO_RT); - const auto& cr_params = surface->GetSurfaceParams(); - if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - AsyncFlushSurface(surface); +private: + /// Iterate over all page indices in a range + template + static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; + for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); } } - render_targets[index].target = surface_view.first; - render_targets[index].view = surface_view.second; - if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(true, static_cast(index)); - return surface_view.second; } - void MarkColorBufferInUse(std::size_t index) { - if (auto& render_target = render_targets[index].target) { - render_target->MarkAsModified(true, Tick()); - } - } + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, std::span indices, + std::span image_view_ids); - void MarkDepthBufferInUse() { - if (depth_buffer.target) { - depth_buffer.target->MarkAsModified(true, Tick()); - } - } + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, u32 index); - void SetEmptyDepthBuffer() { - if (depth_buffer.target == nullptr) { - return; - } - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; - } + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); - void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target == nullptr) { - return; - } - render_targets[index].target->MarkAsRenderTarget(false, NO_RT); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; - } - - void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Tegra::Engines::Fermi2D::Config& copy_config) { - std::lock_guard lock{mutex}; - SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); - SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - const GPUVAddr src_gpu_addr = src_config.Address(); - const GPUVAddr dst_gpu_addr = dst_config.Address(); - DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - - const std::optional dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); - const std::optional src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; - ImageBlit(src_surface, dst_surface.second, copy_config); - dst_surface.first->MarkAsModified(true, Tick()); - } - - TSurface TryFindFramebufferSurface(VAddr addr) const { - if (!addr) { - return nullptr; - } - const VAddr page = addr >> registry_page_bits; - const auto it = registry.find(page); - if (it == registry.end()) { - return nullptr; - } - const auto& list = it->second; - const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { - return surface->GetCpuAddr() == addr; - }); - return found != list.end() ? *found : nullptr; - } + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image); - u64 Tick() { - return ++ticks; - } + /// Upload data from guest to an image + template + void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); - void CommitAsyncFlushes() { - committed_flushes.push_back(uncommitted_flushes); - uncommitted_flushes.reset(); - } + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); - bool HasUncommittedFlushes() const { - return uncommitted_flushes != nullptr; - } + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); - bool ShouldWaitAsyncFlushes() const { - return !committed_flushes.empty() && committed_flushes.front() != nullptr; - } + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); - void PopAsyncFlushes() { - if (committed_flushes.empty()) { - return; - } - auto& flush_list = committed_flushes.front(); - if (!flush_list) { - committed_flushes.pop_front(); - return; - } - for (TSurface& surface : *flush_list) { - FlushSurface(surface); - } - committed_flushes.pop_front(); - } + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); -protected: - explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - bool is_astc_supported_) - : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - gpu_memory{gpu_memory_} { - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - SetEmptyColorBuffer(i); - } + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); - SetEmptyDepthBuffer(); - staging_cache.SetSize(2); + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[static_cast(a)] = b; - siblings_table[static_cast(b)] = a; - }; - std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); - make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); - make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); - sampled_textures.reserve(64); - } + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); - ~TextureCache() = default; + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); - virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); - virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, - const CopyParams& copy_params) = 0; + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); - virtual void ImageBlit(TView& src_view, TView& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + /// Iterates over all the images in a region calling func + template + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); - // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a separate buffer. - virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); - void ManageRenderTargetUnregister(TSurface& surface) { - auto& dirty = maxwell3d.dirty; - const u32 index = surface->GetRenderTarget(); - if (index == DEPTH_RT) { - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; - } else { - dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; - } - dirty.flags[VideoCommon::Dirty::RenderTargets] = true; + /// Register image in the page table + void RegisterImage(ImageId image); + + /// Unregister image from the page table + void UnregisterImage(ImageId image); + + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image); + + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; + + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + RenderTargets render_targets; + + std::mutex mutex; + + std::unordered_map image_views; + std::unordered_map samplers; + std::unordered_map framebuffers; + + std::unordered_map, IdentityHash> page_table; + + bool has_deleted_images = false; + + SlotVector slot_images; + SlotVector slot_image_views; + SlotVector slot_image_allocs; + SlotVector slot_samplers; + SlotVector slot_framebuffers; + + // TODO: This data structure is not optimal and it should be reworked + std::vector uncommitted_downloads; + std::queue> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing sentenced_images; + DelayedDestructionRing sentenced_image_view; + DelayedDestructionRing sentenced_framebuffers; + + std::unordered_map image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; +}; + +template +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); +} + +template +void TextureCache

::TickFrame() { + // Tick sentenced resources in this order to ensure they are destroyed in the right order + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template +std::unique_lock TextureCache

::AcquireLock() { + return std::unique_lock{mutex}; +} + +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + [[unlikely]] if (index > graphics_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); } + return &slot_samplers[id]; +} - void Register(TSurface surface) { - const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", - gpu_addr); - return; - } - surface->SetCpuAddr(*cpu_addr); - RegisterInnerCache(surface); - surface->MarkAsRegistered(true); - surface->SetMemoryMarked(true); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + [[unlikely]] if (index > compute_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); } + return &slot_samplers[id]; +} - void UnmarkMemory(TSurface surface) { - if (!surface->IsMemoryMarked()) { - return; - } - const std::size_t size = surface->GetSizeInBytes(); - const VAddr cpu_addr = surface->GetCpuAddr(); - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - surface->SetMemoryMarked(false); +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - void Unregister(TSurface surface) { - if (guard_render_targets && surface->IsProtected()) { - return; - } - if (!guard_render_targets && surface->IsRenderTarget()) { - ManageRenderTargetUnregister(surface); - } - UnmarkMemory(surface); - if (surface->IsSyncPending()) { - marked_for_unregister.remove(surface); - surface->SetSyncPending(false); - } - UnregisterInnerCache(surface); - surface->MarkAsRegistered(false); - ReserveSurface(surface->GetSurfaceParams(), surface); +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) { - surface->SetGpuAddr(gpu_addr); - return surface; - } - // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(gpu_addr, params)}; - return new_surface; +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + return; } + flags[Dirty::RenderTargets] = false; - const bool is_astc_supported; + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; -private: - enum class RecycleStrategy : u32 { - Ignore = 0, - Flush = 1, - BufferCopy = 3, - }; + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - enum class DeductionType : u32 { - DeductionComplete, - DeductionIncomplete, - DeductionFailed, + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, }; +} - struct Deduction { - DeductionType type{DeductionType::DeductionFailed}; - TSurface surface{}; +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} - bool Failed() const { - return type == DeductionType::DeductionFailed; - } +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} - bool Incomplete() const { - return type == DeductionType::DeductionIncomplete; - } +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} - bool IsDepth() const { - return surface->GetSurfaceParams().IsPixelFormatZeta(); - } - }; +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} - /** - * Takes care of selecting a proper strategy to deal with a texture recycle. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param untopological Indicates to the recycler that the texture has no way - * to match the overlaps due to topological reasons. - **/ - RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { - if (Settings::IsGPULevelExtreme()) { - return RecycleStrategy::Flush; - } - // 3D Textures decision - if (params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - for (const auto& s : overlaps) { - const auto& s_params = s->GetSurfaceParams(); - if (s_params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - } - // Untopological decision - if (untopological == MatchTopologyResult::CompressUnmatch) { - return RecycleStrategy::Flush; - } - if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { - return RecycleStrategy::Flush; - } - return RecycleStrategy::Ignore; - } - - /** - * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented - * strategies: Ignore and Flush. - * - * - Ignore: Just unregisters all the overlaps and loads the new texture. - * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters for the new surface. - * @param gpu_addr The starting address of the new surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or left - * blank. - * @param untopological Indicates to the recycler that the texture has no way to match the - * overlaps due to topological reasons. - **/ - std::pair RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool preserve_contents, - const MatchTopologyResult untopological) { - const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); - for (auto& surface : overlaps) { - Unregister(surface); - } - switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { - case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, do_load); - } - case RecycleStrategy::Flush: { - std::sort(overlaps.begin(), overlaps.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& surface : overlaps) { - FlushSurface(surface); - } - return InitializeSurface(gpu_addr, params, preserve_contents); +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; } - case RecycleStrategy::BufferCopy: { - auto new_surface = GetUncachedSurface(gpu_addr, params); - BufferCopy(overlaps[0], new_surface); - return {new_surface, new_surface->GetMainView()}; + image.flags |= ImageFlagBits::CpuModified; + UntrackImage(image); + }); +} + +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + // Skip images that were not modified from the GPU + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; } - default: { - UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, do_load); + // Skip images that .are. modified from the CPU + // We don't want to write sensitive data from the guest + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; } + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return; } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, 0, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); } +} - /** - * Takes a single surface and recreates into another that may differ in - * format, target or width alignment. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we'll use to recreate the surface. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, - bool is_render) { - const auto gpu_addr = current_surface->GetGpuAddr(); - const auto& cr_params = current_surface->GetSurfaceParams(); - TSurface new_surface; - if (cr_params.pixel_format != params.pixel_format && !is_render && - GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { - SurfaceParams new_params = params; - new_params.pixel_format = cr_params.pixel_format; - new_params.type = cr_params.type; - new_surface = GetUncachedSurface(gpu_addr, new_params); - } else { - new_surface = GetUncachedSurface(gpu_addr, params); - } - const SurfaceParams& final_params = new_surface->GetSurfaceParams(); - if (cr_params.type != final_params.type) { - if (Settings::IsGPULevelExtreme()) { - BufferCopy(current_surface, new_surface); - } - } else { - std::vector bricks = current_surface->BreakDown(final_params); - for (auto& brick : bricks) { - TryCopyImage(current_surface, new_surface, brick); - } - } - Unregister(current_surface); - Register(new_surface); - new_surface->MarkAsModified(current_surface->IsModified(), Tick()); - return {new_surface, new_surface->GetMainView()}; - } - - /** - * Takes a single surface and checks with the new surface's params if it's an exact - * match, we return the main view of the registered surface. If its formats don't - * match, we rebuild the surface. We call this last method a `Mirage`. If formats - * match but the targets don't, we create an overview View of the registered surface. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we want to check. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params, bool is_render) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = [&]() -> std::pair { - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; - }; - if (!is_mirage) { - return match_check(); - } - if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); - } - - /** - * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate - * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps - * of the new surface, if they all match we end up recreating a surface for them, - * else we return nothing. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - **/ - std::optional> TryReconstructSurface(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr) { - if (params.target == SurfaceTarget::Texture3D) { - return std::nullopt; - } - const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; - TSurface new_surface = GetUncachedSurface(gpu_addr, params); +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image); + } + UnregisterImage(id); + DeleteImage(id); + } +} - if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { - LoadSurface(new_surface); - for (const auto& surface : overlaps) { - Unregister(surface); - } - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const std::array src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - std::size_t passed_tests = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; - if (!mipmap_layer) { - continue; - } - const auto [base_layer, base_mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { - continue; - } - ++passed_tests; - - // Copy all mipmaps and layers - const u32 block_width = params.GetDefaultBlockWidth(); - const u32 block_height = params.GetDefaultBlockHeight(); - for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - if (width < block_width || height < block_height) { - // Current APIs forbid copying small compressed textures, avoid errors - break; - } - const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, - src_params.depth); - TryCopyImage(surface, new_surface, copy_params); - } - } - if (passed_tests == 0) { - return std::nullopt; - } - if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - // In Accurate GPU all tests should pass, else we recycle - return std::nullopt; - } + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const std::array dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; - const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); - for (const auto& surface : overlaps) { - Unregister(surface); - } + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } - - /** - * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D - * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of - * the HLE methods. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - */ - std::optional> Manage3DSurfaces(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr, - bool preserve_contents) { - if (params.target != SurfaceTarget::Texture3D) { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return std::make_pair(surface, surface->GetMainView()); - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); - } +template +void TextureCache

::InvalidateColorBuffer(size_t index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + color_buffer_id = FindColorBuffer(index, false); + if (!color_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); + return; + } + // When invalidating a color buffer, the old contents are no longer relevant + ImageView& color_buffer = slot_image_views[color_buffer_id]; + Image& image = slot_images[color_buffer.image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } + runtime.InvalidateColorBuffer(color_buffer, index); +} - if (overlaps.size() == 1) { - const auto& surface = overlaps[0]; - const SurfaceParams& overlap_params = surface->GetSurfaceParams(); - // Don't attempt to render to textures with more than one level for now - // The texture has to be to the right or the sample address if we want to render to it - if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { - const u32 offset = static_cast(cpu_addr - surface->GetCpuAddr()); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - if (slice < overlap_params.depth) { - auto view = surface->Emplace3DView(slice, params.depth, 0, 1); - return std::make_pair(std::move(surface), std::move(view)); - } - } - } +template +void TextureCache

::InvalidateDepthBuffer() { + ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; + depth_buffer_id = FindDepthBuffer(false); + if (!depth_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); + return; + } + // When invalidating the depth buffer, the old contents are no longer relevant + ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; + ImageView& depth_buffer = slot_image_views[depth_buffer_id]; + runtime.InvalidateDepthBuffer(depth_buffer); +} - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D || - src_params.height != params.height || - src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - return std::nullopt; - } - modified |= surface->IsModified(); - - const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - const u32 width = params.width; - const u32 height = params.height; - const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); - TryCopyImage(surface, new_surface, copy_params); +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_ids = it->second; + for (const ImageId image_id : image_ids) { + const ImageBase& image = slot_images[image_id]; + if (image.cpu_addr != cpu_addr) { + continue; } - for (const auto& surface : overlaps) { - Unregister(surface); + if (image.image_view_ids.empty()) { + continue; } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - - TView view = new_surface->GetMainView(); - return std::make_pair(std::move(new_surface), std::move(view)); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps: - * - * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * - * 2. Check if there are any overlaps at all, if there are none, we just load the texture from - * memory else we move to step 3. - * - * 3. Consists of figuring out the relationship between the candidate texture and the - * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If - * there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we - * have to check if the candidate is a view (layer/mipmap) of the overlap or if the - * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct - * a new surface. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, - const SurfaceParams& params, bool preserve_contents, - bool is_render) { - // Step 1 - // Check Level 1 Cache for a fast structural match. If candidate surface - // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - VectorSurface overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None) { - const auto& old_params = current_surface->GetSurfaceParams(); - const bool not_3d = params.target != SurfaceTarget::Texture3D && - old_params.target != SurfaceTarget::Texture3D; - if (not_3d || current_surface->MatchTarget(params.target)) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); - } - } - } - } +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} - // Step 2 - // Obtain all possible overlaps in the memory region - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} - // If none are found, we are done. we just load the surface and create it. - if (overlaps.empty()) { - return InitializeSurface(gpu_addr, params, preserve_contents); - } +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} - // Step 3 - // Now we need to figure the relationship between the texture and its overlaps - // we do a topological test to ensure we can find some relationship. If it fails - // immediately recycle the texture - for (const auto& surface : overlaps) { - const auto topological_result = surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } - } +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.MapDownloadBuffer(total_size_bytes); + size_t buffer_offset = 0; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, buffer_offset, copies); + buffer_offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + buffer_offset = 0; + const std::span download_span = download_map.Span(); + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + const std::span image_download_span = download_span.subspan(buffer_offset); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); + buffer_offset += image.unswizzled_size_bytes; + } + committed_downloads.pop(); +} - // Manage 3D textures - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); - if (surface) { - return *surface; - } +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; } + is_modified = true; + return true; + }); + return is_modified; +} - // Split cases between 1 overlap or many. - if (overlaps.size() == 1) { - TSurface current_surface = overlaps[0]; - // First check if the surface is within the overlap. If not, it means - // two things either the candidate surface is a supertexture of the overlap - // or they don't match in any known way. - if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - // Now we check if the candidate is a mipmap/layer of the overlap - std::optional view = - current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - if (is_mirage) { - // On a mirage view, we need to recreate the surface under this new view - // and then obtain a view again. - SurfaceParams new_params = current_surface->GetSurfaceParams(); - const u32 wh = SurfaceParams::ConvertWidth( - new_params.width, new_params.pixel_format, params.pixel_format); - const u32 hh = SurfaceParams::ConvertHeight( - new_params.height, new_params.pixel_format, params.pixel_format); - new_params.width = wh; - new_params.height = hh; - new_params.pixel_format = params.pixel_format; - std::pair pair = - RebuildSurface(current_surface, new_params, is_render); - std::optional mirage_view = - pair.first->EmplaceView(params, gpu_addr, candidate_size); - if (mirage_view) - return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - return {current_surface, *view}; - } - } else { - // If there are many overlaps, odds are they are subtextures of the candidate - // surface. We try to construct a new surface based on the candidate parameters, - // using the overlaps. If a single overlap fails, this will fail. - std::optional> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - } - // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries to find a - * matching surface within the cache that's similar to it. If there are many textures - * or the texture found if entirely incompatible, it will fail. If no texture is found, the - * blit will be unsuccessful. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - **/ - Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - - if (!cpu_addr) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } +template +void TextureCache

::RefreshContents(Image& image) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image); - if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - current_surface->MatchTarget(params.target)) { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = current_surface; - return result; - } - } + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); + UploadImageContents(image, map, 0); + runtime.InsertUploadMemoryBarrier(); +} - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; +template +template +void TextureCache

::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { + const std::span mapped_span = map.Span().subspan(buffer_offset); + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(map, buffer_offset, copies); + } else if (image.info.type == ImageType::Buffer) { + const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; + image.UploadMemory(map, buffer_offset, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(map, buffer_offset, copies); + } +} - if (overlaps.empty()) { - Deduction result{}; - result.type = DeductionType::DeductionIncomplete; - return result; - } +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidAddress(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} - if (overlaps.size() > 1) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } else { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = overlaps[0]; - return result; - } +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} - /** - * Gets a null surface based on a target texture. - * @param target The target of the null surface. - */ - TView GetNullSurface(SurfaceTarget target) { - const u32 i_target = static_cast(target); - if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { - return it->second->GetMainView(); - } - SurfaceParams params{}; - params.target = target; - params.is_tiled = false; - params.srgb_conversion = false; - params.is_layered = - target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || - target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; - params.block_width = 0; - params.block_height = 0; - params.block_depth = 0; - params.tile_width_spacing = 1; - params.width = 1; - params.height = 1; - params.depth = 1; - if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { - params.depth = 6; - } - params.pitch = 4; - params.num_levels = 1; - params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; - params.type = VideoCore::Surface::SurfaceType::ColorTexture; - auto surface = CreateSurface(0ULL, params); - invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); - surface->UploadTexture(invalid_memory); - surface->MarkAsModified(false, Tick()); - invalid_cache.emplace(i_target, surface); - return surface->GetMainView(); - } - - /** - * Gets the a source and destination starting address and parameters, - * and tries to deduce if they are supposed to be depth textures. If so, their - * parameters are modified and fixed into so. - * - * @param src_params The parameters of the candidate surface. - * @param dst_params The parameters of the destination surface. - * @param src_gpu_addr The starting address of the candidate surface. - * @param dst_gpu_addr The starting address of the destination surface. - **/ - void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, - const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { - auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); - if (deduced_src.Failed() || deduced_dst.Failed()) { - return; +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return ImageId{}; + } + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options)) { + image_id = existing_image_id; + return true; } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} - const bool incomplete_src = deduced_src.Incomplete(); - const bool incomplete_dst = deduced_dst.Incomplete(); +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} - if (incomplete_src && incomplete_dst) { +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + std::vector overlap_ids; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { + if (info.type != overlap.info.type) { return; } - - const bool any_incomplete = incomplete_src || incomplete_dst; - - if (!any_incomplete) { - if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { - return; - } - } else { - if (incomplete_src && !(deduced_dst.IsDepth())) { - return; - } - - if (incomplete_dst && !(deduced_src.IsDepth())) { - return; + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); } + return; + } + const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options)) { + left_aliased_ids.push_back(overlap_id); + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + right_aliased_ids.push_back(overlap_id); } + }); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; - const auto inherit_format = [](SurfaceParams& to, TSurface from) { - const SurfaceParams& params = from->GetSurfaceParams(); - to.pixel_format = params.pixel_format; - to.type = params.type; - }; - // Now we got the cases where one or both is Depth and the other is not known - if (!incomplete_src) { - inherit_format(src_params, deduced_src.surface); + // TODO: Only upload what we need + RefreshContents(new_image); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { - inherit_format(src_params, deduced_dst.surface); + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); } - if (!incomplete_dst) { - inherit_format(dst_params, deduced_dst.surface); - } else { - inherit_format(dst_params, deduced_src.surface); + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap); } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); } + RegisterImage(new_image_id); + return new_image_id; +} - std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { - auto new_surface{GetUncachedSurface(gpu_addr, params)}; - Register(new_surface); - if (preserve_contents) { - LoadSurface(new_surface); - } - return {new_surface, new_surface->GetMainView()}; +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); } + return pair->second; +} - void LoadSurface(const TSurface& surface) { - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(gpu_memory, staging_cache); - surface->UploadTexture(staging_cache.GetBuffer(0)); - surface->MarkAsModified(false, Tick()); +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - void FlushSurface(const TSurface& surface) { - if (!surface->IsModified()) { - return; - } - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(gpu_memory, staging_cache); - surface->MarkAsModified(false, Tick()); - } - - void RegisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache[cpu_addr] = surface; - while (start <= end) { - registry[start].push_back(surface); - start++; - } +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - void UnregisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cpu_addr); - while (start <= end) { - auto& reg{registry[start]}; - reg.erase(std::find(reg.begin(), reg.end(), surface)); - start++; - } +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} - VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { - if (size == 0) { - return {}; +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } } - const VAddr cpu_addr_end = cpu_addr + size; - const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - VectorSurface surfaces; - for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { - const auto it = registry.find(start); - if (it == registry.end()) { + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { continue; } - for (auto& surface : it->second) { - if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { - continue; + if (!image.Overlaps(cpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; } - surface->MarkAsPicked(true); - surfaces.push_back(surface); + } else { + func(image_id, image); } } - for (auto& surface : surfaces) { - surface->MarkAsPicked(false); + if constexpr (BOOL_BREAK) { + return false; } - return surfaces; + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; } +} - void ReserveSurface(const SurfaceParams& params, TSurface surface) { - surface_reserve[params].push_back(std::move(surface)); +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { page_table[page].push_back(image_id); }); +} - TSurface TryGetReservedSurface(const SurfaceParams& params) { - auto search{surface_reserve.find(params)}; - if (search == surface_reserve.end()) { - return {}; +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); + return; } - for (auto& surface : search->second) { - if (!surface->IsRegistered()) { - return surface; - } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); + return; } - return {}; - } + image_ids.erase(vector_it); + }); +} - /// Try to do an image copy logging when formats are incompatible. - void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { - const SurfaceParams& src_params = src->GetSurfaceParams(); - const SurfaceParams& dst_params = dst->GetSurfaceParams(); - if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { - LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, - src_params.pixel_format); - return; +template +void TextureCache

::TrackImage(ImageBase& image) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +} + +template +void TextureCache

::UntrackImage(ImageBase& image) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +} + +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; } - ImageCopy(src, dst, copy); } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); - constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { - return siblings_table[static_cast(format)]; + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} - /// Returns true the shader sampler entry is compatible with the TIC texture type. - static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, - const VideoCommon::Shader::Sampler& entry) { - const auto shader_type = entry.type; - switch (tic_type) { - case Tegra::Texture::TextureType::Texture1D: - case Tegra::Texture::TextureType::Texture1DArray: - return shader_type == Tegra::Shader::TextureType::Texture1D; - case Tegra::Texture::TextureType::Texture1DBuffer: - // TODO(Rodrigo): Assume as valid for now - return true; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return shader_type == Tegra::Shader::TextureType::Texture2D; - case Tegra::Texture::TextureType::Texture2DArray: - return shader_type == Tegra::Shader::TextureType::Texture2D || - shader_type == Tegra::Shader::TextureType::TextureCube; - case Tegra::Texture::TextureType::Texture3D: - return shader_type == Tegra::Shader::TextureType::Texture3D; - case Tegra::Texture::TextureType::TextureCubeArray: - case Tegra::Texture::TextureType::TextureCubemap: - if (shader_type == Tegra::Shader::TextureType::TextureCube) { - return true; - } - return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; } - UNREACHABLE(); - return true; } +} - struct FramebufferTargetInfo { - TSurface target; - TView view; - }; - - void AsyncFlushSurface(TSurface& surface) { - if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared>(); +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; } - uncommitted_flushes->push_back(surface); } +} - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::MemoryManager& gpu_memory; - - FormatLookupTable format_lookup_table; - FormatCompatibility format_compatibility; - - u64 ticks{}; - - // Guards the cache for protection conflicts. - bool guard_render_targets{}; - bool guard_samplers{}; - - // The siblings table is for formats that can inter exchange with one another - // without causing issues. This is only valid when a conflict occurs on a non - // rendering use. - std::array(PixelFormat::Max)> siblings_table; - - // The internal Cache is different for the Texture Cache. It's based on buckets - // of 1MB. This fits better for the purpose of this cache as textures are normaly - // large in size. - static constexpr u64 registry_page_bits{20}; - static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} - static constexpr u32 DEPTH_RT = 8; - static constexpr u32 NO_RT = 0xFFFFFFFF; +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} - // The L1 Cache is used for fast texture lookup before checking the overlaps - // This avoids calculating size and other stuffs. - std::unordered_map l1_cache; +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image); + } + } else { + RefreshContents(image); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; - std::array - render_targets; - FramebufferTargetInfo depth_buffer; +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + PrepareImage(image_view.image_id, is_modification, invalidate); +} - std::vector sampled_textures; +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); - /// This cache stores null surfaces in order to be used as a placeholder - /// for invalid texture calls. - std::unordered_map invalid_cache; - std::vector invalid_memory; + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} - std::list marked_for_unregister; +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} - std::shared_ptr> uncommitted_flushes{}; - std::list>> committed_flushes; +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} - StagingCache staging_cache; - std::recursive_mutex mutex; -}; +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h @@ -0,0 +1,140 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/slot_vector.h" + +namespace VideoCommon { + +constexpr size_t NUM_RT = 8; +constexpr size_t MAX_MIP_LEVELS = 14; + +constexpr SlotId CORRUPT_ID{0xfffffffe}; + +using ImageId = SlotId; +using ImageViewId = SlotId; +using ImageAllocId = SlotId; +using SamplerId = SlotId; +using FramebufferId = SlotId; + +enum class ImageType : u32 { + e1D, + e2D, + e3D, + Linear, + Buffer, +}; + +enum class ImageViewType : u32 { + e1D, + e2D, + Cube, + e3D, + e1DArray, + e2DArray, + CubeArray, + Rect, + Buffer, +}; +constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; + +enum class RelaxedOptions : u32 { + Size = 1 << 0, + Format = 1 << 1, + Samples = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) + +struct Offset2D { + constexpr auto operator<=>(const Offset2D&) const noexcept = default; + + s32 x; + s32 y; +}; + +struct Offset3D { + constexpr auto operator<=>(const Offset3D&) const noexcept = default; + + s32 x; + s32 y; + s32 z; +}; + +struct Extent2D { + constexpr auto operator<=>(const Extent2D&) const noexcept = default; + + u32 width; + u32 height; +}; + +struct Extent3D { + constexpr auto operator<=>(const Extent3D&) const noexcept = default; + + u32 width; + u32 height; + u32 depth; +}; + +struct SubresourceLayers { + s32 base_level = 0; + s32 base_layer = 0; + s32 num_layers = 1; +}; + +struct SubresourceBase { + constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; + + s32 level = 0; + s32 layer = 0; +}; + +struct SubresourceExtent { + constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; + + s32 levels = 1; + s32 layers = 1; +}; + +struct SubresourceRange { + constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; + + SubresourceBase base; + SubresourceExtent extent; +}; + +struct ImageCopy { + SubresourceLayers src_subresource; + SubresourceLayers dst_subresource; + Offset3D src_offset; + Offset3D dst_offset; + Extent3D extent; +}; + +struct BufferImageCopy { + size_t buffer_offset; + size_t buffer_size; + u32 buffer_row_length; + u32 buffer_image_height; + SubresourceLayers image_subresource; + Offset3D image_offset; + Extent3D image_extent; +}; + +struct BufferCopy { + size_t src_offset; + size_t dst_offset; + size_t size; +}; + +struct SwizzleParameters { + Extent3D num_tiles; + Extent3D block; + size_t buffer_offset; + s32 level; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..9ed1fc007 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp @@ -0,0 +1,1232 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This files contains code from Ryujinx +// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx +// The sections using code from Ryujinx are marked with a link to the original version + +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include +#include +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/compatible_formats.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +namespace { + +using Tegra::Texture::GOB_SIZE; +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using Tegra::Texture::GOB_SIZE_Z; +using Tegra::Texture::GOB_SIZE_Z_SHIFT; +using Tegra::Texture::MsaaMode; +using Tegra::Texture::SwizzleTexture; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::UnswizzleTexture; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsViewCompatible; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; + +constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); + +struct LevelInfo { + Extent3D size; + Extent3D block; + Extent2D tile_size; + u32 bpp_log2; + u32 tile_width_spacing; +}; + +[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { + if (shift == 0) { + return 0; + } + u32 x = unit_factor << (shift - 1); + if (x >= dimension) { + while (--shift) { + x >>= 1; + if (x < dimension) { + break; + } + } + } + return shift; +} + +[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { + return std::max(size >> level, 1); +} + +[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { + return Extent3D{ + .width = AdjustMipSize(size.width, level), + .height = AdjustMipSize(size.height, level), + .depth = AdjustMipSize(size.depth, level), + }; +} + +[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + return Extent3D{ + .width = size.width >> samples_x, + .height = size.height >> samples_y, + .depth = size.depth, + }; +} + +template +[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { + do { + while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { + --block_size; + } + } while (level--); + return block_size; +} + +[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, + u32 level) { + return { + .width = AdjustMipBlockSize(num_tiles.width, block_size.width, level), + .height = AdjustMipBlockSize(num_tiles.height, block_size.height, level), + .depth = AdjustMipBlockSize(num_tiles.depth, block_size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { + return { + .width = Common::DivCeil(size.width, tile_size.width), + .height = Common::DivCeil(size.height, tile_size.height), + .depth = size.depth, + }; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { + return std::countl_zero(bytes_per_block) ^ 0x1F; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { + return BytesPerBlockLog2(BytesPerBlock(format)); +} + +[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { + const Extent3D num_blocks = AdjustTileSize(size, tile_size); + return num_blocks.width * num_blocks.height * num_blocks.depth; +} + +[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { + return Common::DivCeil(AdjustMipSize(size, level), block_size); +} + +[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { + return config.Width() * config.Height() * BytesPerBlock(format); +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { + switch (type) { + case TextureType::Texture2D: + case TextureType::Texture2DArray: + case TextureType::Texture2DNoMipmap: + case TextureType::Texture3D: + case TextureType::TextureCubeArray: + case TextureType::TextureCubemap: + return true; + case TextureType::Texture1D: + case TextureType::Texture1DArray: + case TextureType::Texture1DBuffer: + return false; + } + return false; +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { + switch (type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: + return true; + case ImageType::e1D: + case ImageType::Buffer: + return false; + } + UNREACHABLE_MSG("Invalid image type={}", static_cast(type)); +} + +[[nodiscard]] constexpr std::pair Samples(int num_samples) { + switch (num_samples) { + case 1: + return {1, 1}; + case 2: + return {2, 1}; + case 4: + return {2, 2}; + case 8: + return {4, 2}; + case 16: + return {4, 4}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; +} + +[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { + return Extent3D{ + .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, + .height = AdjustSize(info.size.height, level, info.tile_size.height), + .depth = AdjustMipSize(info.size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + return Extent3D{ + .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), + .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), + .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), + }; +} + +[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { + return Extent2D{ + .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, + .height = GOB_SIZE_Y_SHIFT + block_height, + }; +} + +[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, + u32 block_depth) { + return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || + num_tiles.depth < (1U << block_depth); +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, + u32 bpp_log2) { + if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { + return GOB_SIZE_X_SHIFT - bpp_log2; + } else { + return gob.width; + } +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, + u32 tile_width_spacing) { + const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); + return StrideAlignment(num_tiles, block, gob, bpp_log2); +} + +[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent2D gobs{ + .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), + .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), + }; + const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); + const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); + const u32 alignment = is_small ? 0 : info.tile_width_spacing; + return Extent2D{ + .width = Common::AlignBits(gobs.width, alignment), + .height = gobs.height, + }; +} + +[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent3D tile_shift = TileShift(info, level); + const Extent2D gobs = NumGobs(info, level); + return Extent3D{ + .width = Common::DivCeilLog2(gobs.width, tile_shift.width), + .height = Common::DivCeilLog2(gobs.height, tile_shift.height), + .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), + }; +} + +[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { + const Extent3D tile_shift = TileShift(info, level); + const Extent3D tiles = LevelTiles(info, level); + const u32 num_tiles = tiles.width * tiles.height * tiles.depth; + const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; + return num_tiles << shift; +} + +[[nodiscard]] constexpr std::array CalculateLevelSizes(const LevelInfo& info, + u32 num_levels) { + ASSERT(num_levels <= MAX_MIP_LEVELS); + std::array sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(info, level); + } + return sizes; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing) { + const auto [samples_x, samples_y] = Samples(num_samples); + const u32 bytes_per_block = BytesPerBlock(format); + return { + .size = + { + .width = size.width * samples_x, + .height = size.height * samples_y, + .depth = size.depth, + }, + .block = block, + .tile_size = DefaultBlockSize(format), + .bpp_log2 = BytesPerBlockLog2(bytes_per_block), + .tile_width_spacing = tile_width_spacing, + }; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { + return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing); +} + +[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing, + u32 level) { + const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); + u32 offset = 0; + for (u32 current_level = 0; current_level < level; ++current_level) { + offset += CalculateLevelSize(info, current_level); + } + return offset; +} + +[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, + u32 tile_size_y, u32 tile_width_spacing) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 + if (tile_width_spacing > 0) { + const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; + return Common::AlignBits(size_bytes, alignment_log2); + } + const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); + while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { + --block.height; + } + while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { + --block.depth; + } + const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; + const u32 num_blocks = size_bytes >> block_shift; + if (size_bytes != num_blocks << block_shift) { + return (num_blocks + 1) << block_shift; + } + return size_bytes; +} + +[[nodiscard]] std::optional ResolveOverlapEqualAddress(const ImageInfo& new_info, + const ImageBase& overlap, + bool strict_size) { + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != info.block) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + return SubresourceExtent{ + .levels = std::max(resources.levels, info.resources.levels), + .layers = std::max(resources.layers, info.resources.layers), + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress3D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const std::vector slice_offsets = CalculateSliceOffsets(new_info); + const u32 diff = static_cast(overlap.gpu_addr - gpu_addr); + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.end()) { + return std::nullopt; + } + const std::vector subresources = CalculateSliceSubresources(new_info); + const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); + if (mip_depth < info.size.depth + base.layer) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = 1, + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress2D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const u32 layer_stride = new_info.layer_stride; + const s32 new_size = layer_stride * new_info.resources.layers; + const s32 diff = static_cast(overlap.gpu_addr - gpu_addr); + if (diff > new_size) { + return std::nullopt; + } + const s32 base_layer = diff / layer_stride; + const s32 mip_offset = diff % layer_stride; + const std::array offsets = CalculateMipLevelOffsets(new_info); + const auto end = offsets.begin() + new_info.resources.levels; + const auto it = std::find(offsets.begin(), end, mip_offset); + if (it == end) { + // Mipmap is not aligned to any valid size + return std::nullopt; + } + const SubresourceBase base{ + .level = static_cast(std::distance(offsets.begin(), it)), + .layer = base_layer, + }; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + std::optional resources; + if (new_info.type != ImageType::e3D) { + resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); + } else { + resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); + } + if (!resources) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *resources, + }; +} + +[[nodiscard]] std::optional ResolveOverlapLeftAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + const std::optional base = overlap.TryFindBase(gpu_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != MipBlockSize(info, base->level)) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + s32 layers = 1; + if (info.type != ImageType::e3D) { + layers = std::max(resources.layers, info.resources.layers + base->layer); + } + return OverlapResult{ + .gpu_addr = overlap.gpu_addr, + .cpu_addr = overlap.cpu_addr, + .resources = + { + .levels = std::max(resources.levels + base->level, info.resources.levels), + .layers = layers, + }, + }; +} + +[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 + static constexpr u32 STRIDE_ALIGNMENT = 32; + ASSERT(info.type == ImageType::Linear); + const Extent2D num_tiles{ + .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), + }; + const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); + return Extent2D{ + .width = Common::AlignUp(num_tiles.width, width_alignment), + .height = num_tiles.height, + }; +} + +[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 + ASSERT(info.type != ImageType::Linear); + const Extent3D size = AdjustMipSize(info.size, level); + const Extent3D num_tiles{ + .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), + .depth = size.depth, + }; + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); + const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); + return Extent3D{ + .width = Common::AlignBits(num_tiles.width, alignment), + .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), + .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), + }; +} + +[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { + u32 num_blocks = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D mip_size = AdjustMipSize(info.size, level); + num_blocks += NumBlocks(mip_size, tile_size); + } + return num_blocks; +} + +[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { + ASSERT(info.type == ImageType::e3D); + u32 num_slices = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + num_slices += AdjustMipSize(info.size.depth, level); + } + return num_slices; +} + +void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span memory) { + ASSERT(copy.image_offset.z == 0); + ASSERT(copy.image_extent.depth == 1); + ASSERT(copy.image_subresource.base_level == 0); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_subresource.num_layers == 1); + + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 row_length = copy.image_extent.width * bytes_per_block; + const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; + + for (u32 line = 0; line < copy.image_extent.height; ++line) { + const u32 host_offset_y = line * info.pitch; + const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; + const u32 guest_offset = guest_offset_x + guest_offset_y; + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, + row_length); + } +} + +void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span input) { + const Extent3D size = info.size; + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const u32 bytes_per_block = BytesPerBlock(info.format); + + const s32 level = copy.image_subresource.base_level; + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; + + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + UNIMPLEMENTED_IF(copy.image_offset.x != 0); + UNIMPLEMENTED_IF(copy.image_offset.y != 0); + UNIMPLEMENTED_IF(copy.image_offset.z != 0); + UNIMPLEMENTED_IF(copy.image_extent != level_size); + + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + + size_t host_offset = copy.buffer_offset; + + const u32 num_levels = info.resources.levels; + const std::array sizes = CalculateLevelSizes(level_info, num_levels); + size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); + const size_t layer_stride = + AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, + level_info.block, tile_size.height, info.tile_width_spacing); + const size_t subresource_size = sizes[level]; + + const auto dst_data = std::make_unique(subresource_size); + const std::span dst(dst_data.get(), subresource_size); + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span src = input.subspan(host_offset); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + + host_offset += host_bytes_per_layer; + guest_offset += layer_stride; + } + ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); +} + +} // Anonymous namespace + +u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + if (info.resources.layers > 1) { + ASSERT(info.layer_stride != 0); + return info.layer_stride * info.resources.layers; + } else { + return CalculateLayerSize(info); + } +} + +u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.num_samples > 1) { + // Multisample images can't be uploaded or downloaded to the host + return 0; + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + const Extent2D tile_size = DefaultBlockSize(info.format); + return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); +} + +u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + static constexpr Extent2D TILE_SIZE{1, 1}; + return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; +} + +u32 CalculateLayerStride(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + const u32 layer_size = CalculateLayerSize(info); + const Extent3D size = info.size; + const Extent3D block = info.block; + const u32 tile_size_y = DefaultBlockHeight(info.format); + return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); +} + +u32 CalculateLayerSize(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing, info.resources.levels); +} + +std::array CalculateMipLevelOffsets(const ImageInfo& info) noexcept { + ASSERT(info.resources.levels <= MAX_MIP_LEVELS); + const LevelInfo level_info = MakeLevelInfo(info); + std::array offsets{}; + u32 offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + offsets[level] = offset; + offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector CalculateSliceOffsets(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector offsets; + offsets.reserve(NumSlices(info)); + + const LevelInfo level_info = MakeLevelInfo(info); + u32 mip_offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D tile_shift = TileShift(level_info, level); + const Extent3D tiles = LevelTiles(level_info, level); + const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; + const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; + const u32 z_mask = (1U << tile_shift.depth) - 1; + const u32 depth = AdjustMipSize(info.size.depth, level); + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z_low = slice & z_mask; + const u32 z_high = slice & ~z_mask; + offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); + } + mip_offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector CalculateSliceSubresources(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector subresources; + subresources.reserve(NumSlices(info)); + for (s32 level = 0; level < info.resources.levels; ++level) { + const s32 depth = AdjustMipSize(info.size.depth, level); + for (s32 slice = 0; slice < depth; ++slice) { + subresources.emplace_back(SubresourceBase{ + .level = level, + .layer = slice, + }); + } + } + return subresources; +} + +u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); +} + +PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { + return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); +} + +ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { + switch (info.type) { + case ImageType::e2D: + return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; + case ImageType::e3D: + return ImageViewType::e2DArray; + case ImageType::Linear: + return ImageViewType::e2D; + default: + UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast(info.type)); + return ImageViewType{}; + } +} + +std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, + SubresourceBase base) { + ASSERT(dst.resources.levels >= src.resources.levels); + ASSERT(dst.num_samples == src.num_samples); + + const bool is_dst_3d = dst.type == ImageType::e3D; + if (is_dst_3d) { + ASSERT(src.type == ImageType::e3D); + ASSERT(src.resources.levels == 1); + } + + std::vector copies; + copies.reserve(src.resources.levels); + for (s32 level = 0; level < src.resources.levels; ++level) { + ImageCopy& copy = copies.emplace_back(); + copy.src_subresource = SubresourceLayers{ + .base_level = level, + .base_layer = 0, + .num_layers = src.resources.layers, + }; + copy.dst_subresource = SubresourceLayers{ + .base_level = base.level + level, + .base_layer = is_dst_3d ? 0 : base.layer, + .num_layers = is_dst_3d ? 1 : src.resources.layers, + }; + copy.src_offset = Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }; + copy.dst_offset = Offset3D{ + .x = 0, + .y = 0, + .z = is_dst_3d ? base.layer : 0, + }; + const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); + copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); + if (is_dst_3d) { + copy.extent.depth = src.size.depth; + } + } + return copies; +} + +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + if (config.Address() == 0) { + return false; + } + if (config.Address() > (u64(1) << 48)) { + return false; + } + return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); +} + +std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, std::span output) { + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const Extent3D size = info.size; + + if (info.type == ImageType::Linear) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); + + ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); + return {{ + .buffer_offset = 0, + .buffer_size = guest_size_bytes, + .buffer_row_length = info.pitch >> bpp_log2, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + const auto input_data = std::make_unique(guest_size_bytes); + gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); + const std::span input(input_data.get(), guest_size_bytes); + + const LevelInfo level_info = MakeLevelInfo(info); + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); + const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); + const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); + const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, + info.tile_width_spacing); + size_t guest_offset = 0; + u32 host_offset = 0; + std::vector copies(num_levels); + + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = static_cast(host_bytes_per_layer) * num_layers, + .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), + .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); + size_t guest_layer_offset = 0; + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span dst = output.subspan(host_offset); + const std::span src = input.subspan(guest_offset + guest_layer_offset); + UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth, stride_alignment); + guest_layer_offset += layer_stride; + host_offset += host_bytes_per_layer; + } + guest_offset += level_sizes[level]; + } + return copies; +} + +BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); + return BufferCopy{ + .src_offset = 0, + .dst_offset = 0, + .size = image.guest_size_bytes, + }; +} + +void ConvertImage(std::span input, const ImageInfo& info, std::span output, + std::span copies) { + u32 output_offset = 0; + + const Extent2D tile_size = DefaultBlockSize(info.format); + for (BufferImageCopy& copy : copies) { + const u32 level = copy.image_subresource.base_level; + const Extent3D mip_size = AdjustMipSize(info.size, level); + ASSERT(copy.image_offset == Offset3D{}); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_extent == mip_size); + ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); + ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); + + if (IsPixelFormatASTC(info.format)) { + ASSERT(copy.image_extent.depth == 1); + Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), + copy.image_extent.width, copy.image_extent.height, + copy.image_subresource.num_layers, tile_size.width, + tile_size.height, output.subspan(output_offset)); + } else { + DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, + output.subspan(output_offset)); + } + copy.buffer_offset = output_offset; + copy.buffer_row_length = mip_size.width; + copy.buffer_image_height = mip_size.height; + + output_offset += copy.image_extent.width * copy.image_extent.height * + copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; + } +} + +std::vector FullDownloadCopies(const ImageInfo& info) { + const Extent3D size = info.size; + const u32 bytes_per_block = BytesPerBlock(info.format); + if (info.type == ImageType::Linear) { + ASSERT(info.pitch % bytes_per_block == 0); + return {{ + .buffer_offset = 0, + .buffer_size = static_cast(info.pitch) * size.height, + .buffer_row_length = info.pitch / bytes_per_block, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + + u32 host_offset = 0; + + std::vector copies(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = host_bytes_per_level, + .buffer_row_length = level_size.width, + .buffer_image_height = level_size.height, + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + host_offset += host_bytes_per_level; + } + return copies; +} + +Extent3D MipSize(Extent3D size, u32 level) { + return AdjustMipSize(size, level); +} + +Extent3D MipBlockSize(const ImageInfo& info, u32 level) { + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + return AdjustMipBlockSize(num_tiles, level_info.block, level); +} + +std::vector FullUploadSwizzles(const ImageInfo& info) { + const Extent2D tile_size = DefaultBlockSize(info.format); + if (info.type == ImageType::Linear) { + return std::vector{SwizzleParameters{ + .num_tiles = AdjustTileSize(info.size, tile_size), + .block = {}, + .buffer_offset = 0, + .level = 0, + }}; + } + const LevelInfo level_info = MakeLevelInfo(info); + const Extent3D size = info.size; + const s32 num_levels = info.resources.levels; + + u32 guest_offset = 0; + std::vector params(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + params[level] = SwizzleParameters{ + .num_tiles = num_tiles, + .block = block, + .buffer_offset = guest_offset, + .level = level, + }; + guest_offset += CalculateLevelSize(level_info, level); + } + return params; +} + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span copies, std::span memory) { + const bool is_pitch_linear = info.type == ImageType::Linear; + for (const BufferImageCopy& copy : copies) { + if (is_pitch_linear) { + SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } else { + SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } + } +} + +bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, + u32 rhs_level, bool strict_size) noexcept { + ASSERT(lhs.type != ImageType::Linear); + ASSERT(rhs.type != ImageType::Linear); + if (strict_size) { + const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); + const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } else { + const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); + const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } +} + +bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { + ASSERT(lhs.type == ImageType::Linear); + ASSERT(rhs.type == ImageType::Linear); + if (strict_size) { + return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; + } else { + const Extent2D lhs_size = PitchLinearAlignedSize(lhs); + const Extent2D rhs_size = PitchLinearAlignedSize(rhs); + return lhs_size == rhs_size; + } +} + +std::optional ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, + VAddr cpu_addr, const ImageBase& overlap, + bool strict_size) { + ASSERT(new_info.type != ImageType::Linear); + ASSERT(overlap.info.type != ImageType::Linear); + if (!IsLayerStrideCompatible(new_info, overlap.info)) { + return std::nullopt; + } + if (!IsViewCompatible(overlap.info.format, new_info.format)) { + return std::nullopt; + } + if (gpu_addr == overlap.gpu_addr) { + const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); + if (!solution) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *solution, + }; + } + if (overlap.gpu_addr > gpu_addr) { + return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); + } + // if overlap.gpu_addr < gpu_addr + return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); +} + +bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { + // If either of the layer strides is zero, we can assume they are compatible + // These images generally come from rendertargets + if (lhs.layer_stride == 0) { + return true; + } + if (rhs.layer_stride == 0) { + return true; + } + // It's definitely compatible if the layer stride matches + if (lhs.layer_stride == rhs.layer_stride) { + return true; + } + // Although we also have to compare for cases where it can be unaligned + // This can happen if the image doesn't have layers, so the stride is not aligned + if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { + return true; + } + return false; +} + +std::optional FindSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options) { + const std::optional base = image.TryFindBase(candidate_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& existing = image.info; + if (False(options & RelaxedOptions::Format)) { + if (!IsViewCompatible(existing.format, candidate.format)) { + return std::nullopt; + } + } + if (!IsLayerStrideCompatible(existing, candidate)) { + return std::nullopt; + } + if (existing.type != candidate.type) { + return std::nullopt; + } + if (False(options & RelaxedOptions::Samples)) { + if (existing.num_samples != candidate.num_samples) { + return std::nullopt; + } + } + if (existing.resources.levels < candidate.resources.levels + base->level) { + return std::nullopt; + } + if (existing.type == ImageType::e3D) { + const u32 mip_depth = std::max(1U, existing.size.depth << base->level); + if (mip_depth < candidate.size.depth + base->layer) { + return std::nullopt; + } + } else { + if (existing.resources.layers < candidate.resources.layers + base->layer) { + return std::nullopt; + } + } + const bool strict_size = False(options & RelaxedOptions::Size); + if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { + return std::nullopt; + } + // TODO: compare block sizes + return base; +} + +bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, + RelaxedOptions options) { + return FindSubresource(candidate, image, candidate_addr, options).has_value(); +} + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + dst_info.format = dst->info.format; + } + if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } +} + +u32 MapSizeBytes(const ImageBase& image) { + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + return image.guest_size_bytes; + } else if (True(image.flags & ImageFlagBits::Converted)) { + return image.converted_size_bytes; + } else { + return image.unswizzled_size_bytes; + } +} + +using P = PixelFormat; + +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); + +static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); +static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == + 0x50d200); + +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); + +constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, + u32 tile_width_spacing, u32 level) { + const Extent3D size{width, height, 1}; + const Extent3D block{0, block_height, 0}; + const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); + return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); +} + +static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); +static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); + +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, + "Tile width spacing is not working"); +static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, + "Compressed tile width spacing is not working"); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..dbbbd33cd --- /dev/null +++ b/src/video_core/texture_cache/util.h @@ -0,0 +1,107 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; + +struct OverlapResult { + GPUVAddr gpu_addr; + VAddr cpu_addr; + SubresourceExtent resources; +}; + +[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; + +[[nodiscard]] std::array CalculateMipLevelOffsets( + const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); + +[[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); + +[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); + +[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( + const Tegra::Texture::TICEntry& config) noexcept; + +[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base); + +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); + +[[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, const ImageInfo& info, + std::span output); + +[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output); + +void ConvertImage(std::span input, const ImageInfo& info, std::span output, + std::span copies); + +[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info); + +[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); + +[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); + +[[nodiscard]] std::vector FullUploadSwizzles(const ImageInfo& info); + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span copies, std::span memory); + +[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, + const ImageInfo& overlap_info, u32 new_level, + u32 overlap_level, bool strict_size) noexcept; + +[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, + bool strict_size) noexcept; + +[[nodiscard]] std::optional ResolveOverlap(const ImageInfo& new_info, + GPUVAddr gpu_addr, VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size); + +[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); + +[[nodiscard]] std::optional FindSubresource(const ImageInfo& candidate, + const ImageBase& image, + GPUVAddr candidate_addr, + RelaxedOptions options); + +[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options); + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src); + +[[nodiscard]] u32 MapSizeBytes(const ImageBase& image); + +} // namespace VideoCommon diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, +static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, u32 blockHeight) { // Don't actually care about the void extent, just read the bits... for (s32 i = 0; i < 4; ++i) { @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block } } -static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { +static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { for (u32 j = 0; j < blockHeight; j++) { for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = 0xFFFF00FF; @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } -static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, - u32* outBuf) { - InputBitStream strm(inBuf); +static void DecompressBlock(std::span inBuf, const u32 blockWidth, + const u32 blockHeight, std::span outBuf) { + InputBitStream strm(inBuf.data()); TexelWeightParams weightParams = DecodeBlockInfo(strm); // Was there an error? @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 } // Read the texel weight data.. - u8 texelWeightData[16]; - memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); + std::array texelWeightData; + std::ranges::copy(inBuf, texelWeightData.begin()); // Reverse everything for (u32 i = 0; i < 8; i++) { @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 // Make sure that higher non-texel bits are set to zero const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] = - texelWeightData[clearByteStart - 1] & - static_cast((1 << (weightParams.GetPackedBitSize() % 8)) - 1); - memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); + if (clearByteStart > 0) { + texelWeightData[clearByteStart - 1] &= + static_cast((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + } + std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); IntegerEncodedVector texelWeightValues; - InputBitStream weightStream(texelWeightData); + InputBitStream weightStream(texelWeightData.data()); DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, weightParams.GetNumWeightValues()); @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 namespace Tegra::Texture::ASTC { -std::vector Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, - u32 block_height) { - u32 blockIdx = 0; +void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span output) { + u32 block_index = 0; std::size_t depth_offset = 0; - std::vector outData(height * width * depth * 4); - for (u32 k = 0; k < depth; k++) { - for (u32 j = 0; j < height; j += block_height) { - for (u32 i = 0; i < width; i += block_width) { - - const u8* blockPtr = data + blockIdx * 16; + for (u32 z = 0; z < depth; z++) { + for (u32 y = 0; y < height; y += block_height) { + for (u32 x = 0; x < width; x += block_width) { + const std::span blockPtr{data.subspan(block_index * 16, 16)}; // Blocks can be at most 12x12 - u32 uncompData[144]; + std::array uncompData; ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - u32 decompWidth = std::min(block_width, width - i); - u32 decompHeight = std::min(block_height, height - j); + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); - u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; + const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); for (u32 jj = 0; jj < decompHeight; jj++) { - memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); + std::memcpy(outRow.data() + jj * width * 4, + uncompData.data() + jj * block_width, decompWidth * 4); } - - blockIdx++; + ++block_index; } } depth_offset += height * width * 4; } - - return outData; } } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -5,11 +5,10 @@ #pragma once #include -#include namespace Tegra::Texture::ASTC { -std::vector Decompress(const uint8_t* data, uint32_t width, uint32_t height, - uint32_t depth, uint32_t block_width, uint32_t block_height); +void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span output); } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index bd1aebf02..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/surface.h" -#include "video_core/textures/astc.h" -#include "video_core/textures/convert.h" - -namespace Tegra::Texture { - -using VideoCore::Surface::PixelFormat; - -template -void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - union S8Z24 { - BitField<0, 24, u32> z24; - BitField<24, 8, u32> s8; - }; - static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - - union Z24S8 { - BitField<0, 8, u32> s8; - BitField<8, 24, u32> z24; - }; - static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - - S8Z24 s8z24_pixel{}; - Z24S8 z24s8_pixel{}; - constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - if constexpr (reverse) { - std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); - s8z24_pixel.s8.Assign(z24s8_pixel.s8); - s8z24_pixel.z24.Assign(z24s8_pixel.z24); - std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); - } else { - std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); - z24s8_pixel.s8.Assign(s8z24_pixel.s8); - z24s8_pixel.z24.Assign(s8z24_pixel.z24); - std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); - } - } - } -} - -static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8(data, width, height); -} - -static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8(data, width, height); -} - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. - u32 block_width{}; - u32 block_height{}; - std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector rgba8_data = Tegra::Texture::ASTC::Decompress( - in_data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); - } -} - -void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", - pixel_format); - UNREACHABLE(); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); - } -} - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCore::Surface { -enum class PixelFormat; -} - -namespace Tegra::Texture { - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, - u32 width, u32 height, u32 depth, bool convert_astc, - bool convert_s8z24); - -void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -2,204 +2,111 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include +#include +#include + #include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" +#include "common/div_ceil.h" #include "video_core/gpu.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" namespace Tegra::Texture { -namespace { +namespace { /** - * This table represents the internal swizzle of a gob, - * in format 16 bytes x 2 sector packing. + * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. * Calculates the offset of an (x, y) position within a swizzled texture. * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 */ -template -struct alignas(64) SwizzleTable { - static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); - constexpr SwizzleTable() { - for (u32 y = 0; y < N; ++y) { - for (u32 x = 0; x < M; ++x) { - const u32 x2 = x * Align; - values[y][x] = static_cast(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + - ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); - } +constexpr SwizzleTable MakeSwizzleTableConst() { + SwizzleTable table{}; + for (u32 y = 0; y < table.size(); ++y) { + for (u32 x = 0; x < table[0].size(); ++x) { + table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } } - const std::array& operator[](std::size_t index) const { - return values[index]; - } - std::array, N> values{}; -}; + return table; +} -constexpr u32 FAST_SWIZZLE_ALIGN = 16; +constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); -constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable(); -constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable(); +template +void Swizzle(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { + // The origin of the transformation can be configured here, leave it as zero as the current API + // doesn't expose it. + static constexpr u32 origin_x = 0; + static constexpr u32 origin_y = 0; + static constexpr u32 origin_z = 0; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array data_ptrs; - u32 z_address = tile_offset; - - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 x = x_start; x < x_end; x++) { - const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; - const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; - data_ptrs[unswizzle] = swizzled_data + swizzle_offset; - data_ptrs[!unswizzle] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } -} + // We can configure here a custom pitch + // As it's not exposed 'width * bpp' will be the expected pitch. + const u32 pitch = width * bytes_per_pixel; + const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array data_ptrs; - u32 z_address = tile_offset; - const u32 x_startb = x_start * bytes_per_pixel; - const u32 x_endb = x_end * bytes_per_pixel; - - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { - const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; - const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; - const u32 pixel_index{out_x + pixel_base}; - data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; - data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } -} + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + const u32 slice_size = + Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; -/** - * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. - * The body of this function takes care of splitting the swizzled texture into blocks, - * and managing the extents of it. Once all the parameters of a single block are obtained, - * the function calls 'ProcessBlock' to process that particular Block. - * - * Documentation for the memory layout and decoding can be found at: - * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces - */ -template -void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, - const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, - const u32 width_spacing) { - auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 stride_x = width * out_bytes_per_pixel; - const u32 layer_z = height * stride_x; - const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; - constexpr u32 gob_elements_y = GOB_SIZE_Y; - constexpr u32 gob_elements_z = GOB_SIZE_Z; - const u32 block_x_elements = gob_elements_x; - const u32 block_y_elements = gob_elements_y * block_height; - const u32 block_z_elements = gob_elements_z * block_depth; - const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); - const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); - const u32 blocks_on_y = div_ceil(height, block_y_elements); - const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 xy_block_size = GOB_SIZE * block_height; - const u32 block_size = xy_block_size * block_depth; - u32 tile_offset = 0; - for (u32 zb = 0; zb < blocks_on_z; zb++) { - const u32 z_start = zb * block_z_elements; - const u32 z_end = std::min(depth, z_start + block_z_elements); - for (u32 yb = 0; yb < blocks_on_y; yb++) { - const u32 y_start = yb * block_y_elements; - const u32 y_end = std::min(height, y_start + block_y_elements); - for (u32 xb = 0; xb < blocks_on_x; xb++) { - const u32 x_start = xb * block_x_elements; - const u32 x_end = std::min(width, x_start + block_x_elements); - if constexpr (fast) { - FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } else { - PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } - tile_offset += block_size; + const u32 block_height_mask = (1U << block_height) - 1; + const u32 block_depth_mask = (1U << block_depth) - 1; + const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; + + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z = slice + origin_z; + const u32 offset_z = (z >> block_depth) * slice_size + + ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); + for (u32 line = 0; line < height; ++line) { + const u32 y = line + origin_y; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; + + const u32 block_y = y >> GOB_SIZE_Y_SHIFT; + const u32 offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + + for (u32 column = 0; column < width; ++column) { + const u32 x = (column + origin_x) * bytes_per_pixel; + const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 base_swizzled_offset = offset_z + offset_y + offset_x; + const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; + + const u32 unswizzled_offset = + slice * pitch * height + line * pitch + column * bytes_per_pixel; + + u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; + const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; + std::memcpy(dst, src, bytes_per_pixel); } } } } - } // Anonymous namespace -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { - const u32 block_height_size{1U << block_height}; - const u32 block_depth_size{1U << block_depth}; - if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { - SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } else { - SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } +SwizzleTable MakeSwizzleTable() { + return SWIZZLE_TABLE; } -void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - CopySwizzledData((width + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, - bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, - width_spacing); +void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } -std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - std::vector unswizzled_data(width * height * depth * bytes_per_pixel); - UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, - width, height, depth, block_height, block_depth, width_spacing); - return unswizzled_data; +void SwizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 const u32 gob_address_y = (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = static_cast(GOB_SIZE_SHIFT) + block_height; + const u32 x_shift = GOB_SIZE_SHIFT + block_height; for (u32 line = 0; line < line_count; ++line) { const u32 src_y = line + origin_y; - const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; const u32 src_offset_y = (block_y >> block_height) * block_size + @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt const u32 x_shift = static_cast(GOB_SIZE_SHIFT) + block_height + block_depth; for (u32 line = 0; line < line_count; ++line) { - const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; const u32 block_y = line / GOB_SIZE_Y; const u32 dst_offset_y = (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 const std::size_t gob_address_y = (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -4,7 +4,8 @@ #pragma once -#include +#include + #include "common/common_types.h" #include "video_core/textures/texture.h" @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; constexpr u32 GOB_SIZE_Z = 1; constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -constexpr std::size_t GOB_SIZE_X_SHIFT = 6; -constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; -constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; -constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; - -/// Unswizzles a swizzled texture without changing its format. -void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); - -/// Unswizzles a swizzled texture without changing its format. -std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, - u32 width_spacing = 0); - -/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); +constexpr u32 GOB_SIZE_X_SHIFT = 6; +constexpr u32 GOB_SIZE_Y_SHIFT = 3; +constexpr u32 GOB_SIZE_Z_SHIFT = 0; +constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +using SwizzleTable = std::array, GOB_SIZE_Y>; + +/// Returns a z-order swizzle table +SwizzleTable MakeSwizzleTable(); + +/// Unswizzles a block linear texture into linear memory. +void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); + +/// Swizzles linear memory into a block linear texture. +void SwizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -5,9 +5,13 @@ #include #include +#include "common/cityhash.h" #include "core/settings.h" #include "video_core/textures/texture.h" +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; + namespace Tegra::Texture { namespace { @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { } // Anonymous namespace -std::array TSCEntry::GetBorderColor() const noexcept { +std::array TSCEntry::BorderColor() const noexcept { if (!srgb_conversion) { return border_color; } @@ -73,8 +77,16 @@ std::array TSCEntry::GetBorderColor() const noexcept { SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; } -float TSCEntry::GetMaxAnisotropy() const noexcept { +float TSCEntry::MaxAnisotropy() const noexcept { return static_cast(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); } } // namespace Tegra::Texture + +size_t std::hash::operator()(const TICEntry& tic) const noexcept { + return Common::CityHash64(reinterpret_cast(&tic), sizeof tic); +} + +size_t std::hash::operator()(const TSCEntry& tsc) const noexcept { + return Common::CityHash64(reinterpret_cast(&tsc), sizeof tsc); +} diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bbc7e3eaf..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { BC4 = 0x27, BC5 = 0x28, S8D24 = 0x29, - X8Z24 = 0x2a, + X8D24 = 0x2a, D24S8 = 0x2b, - X4V4Z24__COV4R4V = 0x2c, - X4V4Z24__COV8R8V = 0x2d, - V8Z24__COV4R12V = 0x2e, + X4V4D24__COV4R4V = 0x2c, + X4V4D24__COV8R8V = 0x2d, + V8D24__COV4R12V = 0x2e, D32 = 0x2f, D32S8 = 0x30, - X8Z24_X20V4S8__COV4R4V = 0x31, - X8Z24_X20V4S8__COV8R8V = 0x32, - ZF32_X20V4X8__COV4R4V = 0x33, - ZF32_X20V4X8__COV8R8V = 0x34, - ZF32_X20V4S8__COV4R4V = 0x35, - ZF32_X20V4S8__COV8R8V = 0x36, - X8Z24_X16V8S8__COV4R12V = 0x37, - ZF32_X16V8X8__COV4R12V = 0x38, - ZF32_X16V8S8__COV4R12V = 0x39, + X8D24_X20V4S8__COV4R4V = 0x31, + X8D24_X20V4S8__COV8R8V = 0x32, + D32_X20V4X8__COV4R4V = 0x33, + D32_X20V4X8__COV8R8V = 0x34, + D32_X20V4S8__COV4R4V = 0x35, + D32_X20V4S8__COV8R8V = 0x36, + X8D24_X16V8S8__COV4R12V = 0x37, + D32_X16V8X8__COV4R12V = 0x38, + D32_X16V8S8__COV4R12V = 0x39, D16 = 0x3a, - V8Z24__COV8R24V = 0x3b, - X8Z24_X16V8S8__COV8R24V = 0x3c, - ZF32_X16V8X8__COV8R24V = 0x3d, - ZF32_X16V8S8__COV8R24V = 0x3e, + V8D24__COV8R24V = 0x3b, + X8D24_X16V8S8__COV8R24V = 0x3c, + D32_X16V8X8__COV8R24V = 0x3d, + D32_X16V8S8__COV8R24V = 0x3e, ASTC_2D_4X4 = 0x40, ASTC_2D_5X5 = 0x41, ASTC_2D_6X6 = 0x42, @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { }; union TextureHandle { - /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} + /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} u32 raw; BitField<0, 20, u32> tic_id; @@ -155,124 +155,124 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { - static constexpr u32 DefaultBlockHeight = 16; - static constexpr u32 DefaultBlockDepth = 1; - - union { - u32 raw; - BitField<0, 7, TextureFormat> format; - BitField<7, 3, ComponentType> r_type; - BitField<10, 3, ComponentType> g_type; - BitField<13, 3, ComponentType> b_type; - BitField<16, 3, ComponentType> a_type; - - BitField<19, 3, SwizzleSource> x_source; - BitField<22, 3, SwizzleSource> y_source; - BitField<25, 3, SwizzleSource> z_source; - BitField<28, 3, SwizzleSource> w_source; - }; - u32 address_low; union { - BitField<0, 16, u32> address_high; - BitField<21, 3, TICHeaderVersion> header_version; - }; - union { - BitField<0, 3, u32> block_width; - BitField<3, 3, u32> block_height; - BitField<6, 3, u32> block_depth; + struct { + union { + BitField<0, 7, TextureFormat> format; + BitField<7, 3, ComponentType> r_type; + BitField<10, 3, ComponentType> g_type; + BitField<13, 3, ComponentType> b_type; + BitField<16, 3, ComponentType> a_type; + + BitField<19, 3, SwizzleSource> x_source; + BitField<22, 3, SwizzleSource> y_source; + BitField<25, 3, SwizzleSource> z_source; + BitField<28, 3, SwizzleSource> w_source; + }; + u32 address_low; + union { + BitField<0, 16, u32> address_high; + BitField<16, 5, u32> layer_base_3_7; + BitField<21, 3, TICHeaderVersion> header_version; + BitField<24, 1, u32> load_store_hint; + BitField<25, 4, u32> view_coherency_hash; + BitField<29, 3, u32> layer_base_8_10; + }; + union { + BitField<0, 3, u32> block_width; + BitField<3, 3, u32> block_height; + BitField<6, 3, u32> block_depth; - BitField<10, 3, u32> tile_width_spacing; + BitField<10, 3, u32> tile_width_spacing; - // High 16 bits of the pitch value - BitField<0, 16, u32> pitch_high; - BitField<26, 1, u32> use_header_opt_control; - BitField<27, 1, u32> depth_texture; - BitField<28, 4, u32> max_mip_level; + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + BitField<26, 1, u32> use_header_opt_control; + BitField<27, 1, u32> depth_texture; + BitField<28, 4, u32> max_mip_level; - BitField<0, 16, u32> buffer_high_width_minus_one; - }; - union { - BitField<0, 16, u32> width_minus_1; - BitField<22, 1, u32> srgb_conversion; - BitField<23, 4, TextureType> texture_type; - BitField<29, 3, u32> border_size; + BitField<0, 16, u32> buffer_high_width_minus_one; + }; + union { + BitField<0, 16, u32> width_minus_one; + BitField<16, 3, u32> layer_base_0_2; + BitField<22, 1, u32> srgb_conversion; + BitField<23, 4, TextureType> texture_type; + BitField<29, 3, u32> border_size; - BitField<0, 16, u32> buffer_low_width_minus_one; - }; - union { - BitField<0, 16, u32> height_minus_1; - BitField<16, 14, u32> depth_minus_1; - }; - union { - BitField<6, 13, u32> mip_lod_bias; - BitField<27, 3, u32> max_anisotropy; + BitField<0, 16, u32> buffer_low_width_minus_one; + }; + union { + BitField<0, 16, u32> height_minus_1; + BitField<16, 14, u32> depth_minus_1; + BitField<30, 1, u32> is_sparse; + BitField<31, 1, u32> normalized_coords; + }; + union { + BitField<6, 13, u32> mip_lod_bias; + BitField<27, 3, u32> max_anisotropy; + }; + union { + BitField<0, 4, u32> res_min_mip_level; + BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; + BitField<12, 12, u32> min_lod_clamp; + }; + }; + std::array raw; }; - union { - BitField<0, 4, u32> res_min_mip_level; - BitField<4, 4, u32> res_max_mip_level; - BitField<8, 4, MsaaMode> msaa_mode; - BitField<12, 12, u32> min_lod_clamp; - }; + constexpr bool operator==(const TICEntry& rhs) const noexcept { + return raw == rhs.raw; + } - GPUVAddr Address() const { + constexpr bool operator!=(const TICEntry& rhs) const noexcept { + return raw != rhs.raw; + } + + constexpr GPUVAddr Address() const { return static_cast((static_cast(address_high) << 32) | address_low); } - u32 Pitch() const { + constexpr u32 Pitch() const { ASSERT(header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey); // The pitch value is 21 bits, and is 32B aligned. return pitch_high << 5; } - u32 Width() const { + constexpr u32 Width() const { if (header_version != TICHeaderVersion::OneDBuffer) { - return width_minus_1 + 1; + return width_minus_one + 1; } - return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; + return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; } - u32 Height() const { + constexpr u32 Height() const { return height_minus_1 + 1; } - u32 Depth() const { + constexpr u32 Depth() const { return depth_minus_1 + 1; } - u32 BlockWidth() const { - ASSERT(IsTiled()); - return block_width; - } - - u32 BlockHeight() const { - ASSERT(IsTiled()); - return block_height; - } - - u32 BlockDepth() const { - ASSERT(IsTiled()); - return block_depth; + constexpr u32 BaseLayer() const { + return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; } - bool IsTiled() const { + constexpr bool IsBlockLinear() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; } - bool IsLineal() const { + constexpr bool IsPitchLinear() const { return header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey; } - bool IsBuffer() const { + constexpr bool IsBuffer() const { return header_version == TICHeaderVersion::OneDBuffer; } - - bool IsSrgbConversionEnabled() const { - return srgb_conversion != 0; - } }; static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { Linear = 3, }; +enum class SamplerReduction : u32 { + WeightedAverage = 0, + Min = 1, + Max = 2, +}; + enum class Anisotropy { Default, Filter2x, @@ -333,8 +339,12 @@ struct TSCEntry { BitField<0, 2, TextureFilter> mag_filter; BitField<4, 2, TextureFilter> min_filter; BitField<6, 2, TextureMipmapFilter> mipmap_filter; + BitField<8, 1, u32> cubemap_anisotropy; BitField<9, 1, u32> cubemap_interface_filtering; + BitField<10, 2, SamplerReduction> reduction_filter; BitField<12, 13, u32> mip_lod_bias; + BitField<25, 1, u32> float_coord_normalization; + BitField<26, 5, u32> trilin_opt; }; union { BitField<0, 12, u32> min_lod_clamp; @@ -347,32 +357,45 @@ struct TSCEntry { }; std::array border_color; }; - std::array raw; + std::array raw; }; - std::array GetBorderColor() const noexcept; + constexpr bool operator==(const TSCEntry& rhs) const noexcept { + return raw == rhs.raw; + } + + constexpr bool operator!=(const TSCEntry& rhs) const noexcept { + return raw != rhs.raw; + } + + std::array BorderColor() const noexcept; - float GetMaxAnisotropy() const noexcept; + float MaxAnisotropy() const noexcept; - float GetMinLod() const { + float MinLod() const { return static_cast(min_lod_clamp) / 256.0f; } - float GetMaxLod() const { + float MaxLod() const { return static_cast(max_lod_clamp) / 256.0f; } - float GetLodBias() const { + float LodBias() const { // Sign extend the 13-bit value. - constexpr u32 mask = 1U << (13 - 1); + static constexpr u32 mask = 1U << (13 - 1); return static_cast(static_cast((mip_lod_bias ^ mask) - mask)) / 256.0f; } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); -struct FullTextureInfo { - TICEntry tic; - TSCEntry tsc; +} // namespace Tegra::Texture + +template <> +struct std::hash { + size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; }; -} // namespace Tegra::Texture +template <> +struct std::hash { + size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; +}; -- cgit v1.2.3 From f0d9ab0717b5148b5d3569af96333ca69bc46272 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 30 Dec 2020 13:24:59 -0500 Subject: maxwell_to_vk: Initialize usage variable in SurfaceFormat() Silences a -Wmaybe-uninitialized warning --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 40501e7fa..4c988429f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -239,7 +239,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; - VkFormatFeatureFlags usage; + VkFormatFeatureFlags usage{}; switch (format_type) { case FormatType::Buffer: usage = -- cgit v1.2.3 From bcafef4b941bd6ad33f1206a3029da0ae2bc3507 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 30 Dec 2020 17:59:42 -0500 Subject: half_set: Resolve -Wmaybe-uninitialized warnings --- src/video_core/shader/decode/half_set.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index b2e88fa20..fa83108cd 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - PredCondition cond; - bool bf; - bool ftz; - bool neg_a; - bool abs_a; - bool neg_b; - bool abs_b; + PredCondition cond{}; + bool bf = false; + bool ftz = false; + bool neg_a = false; + bool abs_a = false; + bool neg_b = false; + bool abs_b = false; switch (opcode->get().GetId()) { case OpCode::Id::HSET2_C: case OpCode::Id::HSET2_IMM: -- cgit v1.2.3 From d93742142243dea1355012b9f0ce7f5ac8a2dc02 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 21:24:34 -0300 Subject: vulkan_common: Move dynamic library load to a separate file Allows us to initialize a Vulkan dynamic library from different backends without duplicating code. --- src/video_core/CMakeLists.txt | 2 ++ src/video_core/renderer_vulkan/renderer_vulkan.cpp | 39 +++++----------------- src/video_core/vulkan_common/vulkan_library.cpp | 36 ++++++++++++++++++++ src/video_core/vulkan_common/vulkan_library.h | 13 ++++++++ 4 files changed, 59 insertions(+), 31 deletions(-) create mode 100644 src/video_core/vulkan_common/vulkan_library.cpp create mode 100644 src/video_core/vulkan_common/vulkan_library.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 948e167c3..d967fe07b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -260,6 +260,8 @@ add_library(video_core STATIC textures/texture.h video_core.cpp video_core.h + vulkan_common/vulkan_library.cpp + vulkan_common/vulkan_library.h ) create_target_directory_groups(video_core) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7f521cb9b..7a34c95ab 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -12,8 +12,6 @@ #include -#include "common/dynamic_library.h" -#include "common/file_util.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -32,6 +30,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_library.h" // Include these late to avoid polluting previous headers #ifdef _WIN32 @@ -70,31 +69,10 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, return VK_FALSE; } -Common::DynamicLibrary OpenVulkanLibrary() { - Common::DynamicLibrary library; -#ifdef __APPLE__ - // Check if a path to a specific Vulkan library has been specified. - char* libvulkan_env = getenv("LIBVULKAN_PATH"); - if (!libvulkan_env || !library.Open(libvulkan_env)) { - // Use the libvulkan.dylib from the application bundle. - const std::string filename = - Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; - library.Open(filename.c_str()); - } -#else - std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); - if (!library.Open(filename.c_str())) { - // Android devices may not have libvulkan.so.1, only libvulkan.so. - filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); - (void)library.Open(filename.c_str()); - } -#endif - return library; -} - -std::pair CreateInstance(Common::DynamicLibrary& library, - vk::InstanceDispatch& dld, WindowSystemType window_type, - bool enable_debug_utils, bool enable_layers) { +std::pair CreateInstance( + Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + WindowSystemType window_type = WindowSystemType::Headless, bool enable_debug_utils = false, + bool enable_layers = false) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); return {}; @@ -285,7 +263,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { } bool RendererVulkan::Init() { - library = OpenVulkanLibrary(); + library = OpenLibrary(); std::tie(instance, instance_version) = CreateInstance( library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { @@ -446,9 +424,8 @@ void RendererVulkan::Report() const { std::vector RendererVulkan::EnumerateDevices() { vk::InstanceDispatch dld; - Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = - CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; + Common::DynamicLibrary library = OpenLibrary(); + vk::Instance instance = CreateInstance(library, dld).first; if (!instance) { return {}; } diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp new file mode 100644 index 000000000..27c958221 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.cpp @@ -0,0 +1,36 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/dynamic_library.h" +#include "common/file_util.h" +#include "video_core/vulkan_common/vulkan_library.h" + +namespace Vulkan { + +Common::DynamicLibrary OpenLibrary() { + Common::DynamicLibrary library; +#ifdef __APPLE__ + // Check if a path to a specific Vulkan library has been specified. + char* const libvulkan_env = std::getenv("LIBVULKAN_PATH"); + if (!libvulkan_env || !library.Open(libvulkan_env)) { + // Use the libvulkan.dylib from the application bundle. + const std::string filename = + Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + library.Open(filename.c_str()); + } +#else + std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); + if (!library.Open(filename.c_str())) { + // Android devices may not have libvulkan.so.1, only libvulkan.so. + filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); + void(library.Open(filename.c_str())); + } +#endif + return library; +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h new file mode 100644 index 000000000..8b28b0e17 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.h @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/dynamic_library.h" + +namespace Vulkan { + +Common::DynamicLibrary OpenLibrary(); + +} // namespace Vulkan -- cgit v1.2.3 From d1435009ed914cc50533a71b1e25132376c28586 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 21:30:11 -0300 Subject: vulkan_common: Rename renderer_vulkan/wrapper.h to vulkan_common/vulkan_wrapper.h Allows sharing Vulkan wrapper code between different rendering backends. --- src/video_core/CMakeLists.txt | 4 +- src/video_core/renderer_vulkan/blit_image.cpp | 2 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.h | 2 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 2 +- src/video_core/renderer_vulkan/vk_blit_screen.h | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- src/video_core/renderer_vulkan/vk_command_pool.cpp | 2 +- src/video_core/renderer_vulkan/vk_command_pool.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 2 +- .../renderer_vulkan/vk_descriptor_pool.h | 2 +- src/video_core/renderer_vulkan/vk_device.cpp | 2 +- src/video_core/renderer_vulkan/vk_device.h | 2 +- .../renderer_vulkan/vk_fence_manager.cpp | 2 +- src/video_core/renderer_vulkan/vk_fence_manager.h | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_master_semaphore.cpp | 2 +- .../renderer_vulkan/vk_master_semaphore.h | 2 +- .../renderer_vulkan/vk_memory_manager.cpp | 2 +- src/video_core/renderer_vulkan/vk_memory_manager.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 2 +- src/video_core/renderer_vulkan/vk_query_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_query_cache.h | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- src/video_core/renderer_vulkan/vk_shader_util.cpp | 2 +- src/video_core/renderer_vulkan/vk_shader_util.h | 2 +- .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- .../renderer_vulkan/vk_staging_buffer_pool.h | 2 +- .../renderer_vulkan/vk_stream_buffer.cpp | 2 +- src/video_core/renderer_vulkan/vk_stream_buffer.h | 2 +- src/video_core/renderer_vulkan/vk_swapchain.cpp | 2 +- src/video_core/renderer_vulkan/vk_swapchain.h | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_texture_cache.h | 2 +- .../renderer_vulkan/vk_update_descriptor.cpp | 2 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- src/video_core/renderer_vulkan/wrapper.cpp | 928 --------------- src/video_core/renderer_vulkan/wrapper.h | 1213 -------------------- src/video_core/vulkan_common/vulkan_wrapper.cpp | 928 +++++++++++++++ src/video_core/vulkan_common/vulkan_wrapper.h | 1213 ++++++++++++++++++++ 53 files changed, 2191 insertions(+), 2191 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/wrapper.cpp delete mode 100644 src/video_core/renderer_vulkan/wrapper.h create mode 100644 src/video_core/vulkan_common/vulkan_wrapper.cpp create mode 100644 src/video_core/vulkan_common/vulkan_wrapper.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d967fe07b..e19632bb1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -170,8 +170,6 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h - renderer_vulkan/wrapper.cpp - renderer_vulkan/wrapper.h shader_cache.h shader_notify.cpp shader_notify.h @@ -262,6 +260,8 @@ add_library(video_core STATIC video_core.h vulkan_common/vulkan_library.cpp vulkan_common/vulkan_library.h + vulkan_common/vulkan_wrapper.cpp + vulkan_common/vulkan_wrapper.h ) create_target_directory_groups(video_core) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 87c8e5693..504492cac 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -17,8 +17,8 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 2c2790bf9..1a4f66336 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -8,8 +8,8 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/types.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 4c988429f..ed4fce714 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -10,8 +10,8 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 1a90f192e..8cf5aa711 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -7,9 +7,9 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7a34c95ab..6e267f89d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -29,8 +29,8 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/vulkan_common/vulkan_library.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" // Include these late to avoid polluting previous headers #ifdef _WIN32 diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 74642fba4..4a0abfaad 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -11,7 +11,7 @@ #include "common/dynamic_library.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class TelemetrySession; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index d3a83f22f..a205cd151 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -27,9 +27,9 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 2ee374247..cc56c4560 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -7,7 +7,7 @@ #include #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 10d296c2f..79131f819 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -12,7 +12,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index daf498222..3ab77a00b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -11,7 +11,7 @@ #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 8f7d6410e..ccae04929 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -6,7 +6,7 @@ #include "video_core/renderer_vulkan/vk_command_pool.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 62a7ce3f1..ce0e34515 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -8,7 +8,7 @@ #include #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2c030e910..5d4543bae 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index abdf61e2c..1b7502a4f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 62f44d6da..9966dd14a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -11,7 +11,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 49e2113a2..a7197536c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -7,7 +7,7 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index f38e089d5..4dea03239 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -9,7 +9,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 544f32a20..2abcaeddd 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -7,7 +7,7 @@ #include #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 370a63f74..f3dd6eae1 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -14,7 +14,7 @@ #include "common/assert.h" #include "core/settings.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 995dcfc0f..9673f47c7 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 774a12a53..cd044c187 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -10,7 +10,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c2869e8e3..272ae6d29 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -9,7 +9,7 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7979df3a8..d9c1ed553 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -17,7 +17,7 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 214d06b4c..3bc93bc2a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -13,7 +13,7 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index ae26e558d..ed6ea0805 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -8,7 +8,7 @@ #include "core/settings.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0e93706d7..747d2f3bc 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 56b24b70f..35f859f77 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -13,7 +13,7 @@ #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 318f8b43e..20463ecad 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -9,7 +9,7 @@ #include #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 083796d05..b44fd6159 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -25,11 +25,11 @@ #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index fbaa8257c..5ce1b17f3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -20,12 +20,12 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 038760de3..7852178b6 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -11,7 +11,7 @@ #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 837fe9ebf..b4fb6b3b0 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -12,7 +12,7 @@ #include "common/common_types.h" #include "video_core/query_cache.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace VideoCore { class RasterizerInterface; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 04c5c859c..1c174e7ec 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,9 +36,9 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader_cache.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 990f9e031..7b9ec3bb8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -29,8 +29,8 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c104c6fe3..f7b79e74c 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -17,7 +17,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 0a36c8fad..1172ec622 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -12,7 +12,7 @@ #include #include "common/common_types.h" #include "common/threadsafe_queue.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 38a0be7f2..630306077 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -9,7 +9,7 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index dce34a140..98ee5e668 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -7,7 +7,7 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 2fd3b7f39..e5155e886 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -12,7 +12,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 2dd5049ac..97ed1118a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -10,7 +10,7 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 419cb154d..aae50bf25 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -13,7 +13,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 1428f77bf..aebd68728 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -9,7 +9,7 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9636a7c65..458aa4532 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -14,7 +14,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 6b39befdf..25eb20832 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -7,7 +7,7 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Layout { struct FramebufferLayout; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 261808391..e04dd23ef 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -14,7 +14,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index edc3d80c0..576515bcc 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -8,8 +8,8 @@ #include #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 8826da325..c0603ac22 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -10,7 +10,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f098a8540..d0ae49010 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp deleted file mode 100644 index 2a21e850d..000000000 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ /dev/null @@ -1,928 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/logging/log.h" - -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan::vk { - -namespace { - -template -void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld, - Func&& func) { - // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap - // functions. - std::stable_sort(devices.begin(), devices.end(), - [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) { - return func(vk::PhysicalDevice(lhs, dld).GetProperties(), - vk::PhysicalDevice(rhs, dld).GetProperties()); - }); -} - -void SortPhysicalDevicesPerVendor(std::vector& devices, - const InstanceDispatch& dld, - std::initializer_list vendor_ids) { - for (auto it = vendor_ids.end(); it != vendor_ids.begin();) { - --it; - SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) { - return lhs.vendorID == id && rhs.vendorID != id; - }); - } -} - -void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld) { - // Sort by name, this will set a base and make GPUs with higher numbers appear first - // (e.g. GTX 1650 will intentionally be listed before a GTX 1080). - SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { - return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName}; - }); - // Prefer discrete over non-discrete - SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { - return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && - rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; - }); - // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest. - SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086}); -} - -template -bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name, - VkInstance instance = nullptr) noexcept { - result = reinterpret_cast(dld.vkGetInstanceProcAddr(instance, proc_name)); - return result != nullptr; -} - -template -void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept { - result = reinterpret_cast(dld.vkGetDeviceProcAddr(device, proc_name)); -} - -void Load(VkDevice device, DeviceDispatch& dld) noexcept { -#define X(name) Proc(dld.name, dld, #name, device) - X(vkAcquireNextImageKHR); - X(vkAllocateCommandBuffers); - X(vkAllocateDescriptorSets); - X(vkAllocateMemory); - X(vkBeginCommandBuffer); - X(vkBindBufferMemory); - X(vkBindImageMemory); - X(vkCmdBeginQuery); - X(vkCmdBeginRenderPass); - X(vkCmdBeginTransformFeedbackEXT); - X(vkCmdBeginDebugUtilsLabelEXT); - X(vkCmdBindDescriptorSets); - X(vkCmdBindIndexBuffer); - X(vkCmdBindPipeline); - X(vkCmdBindTransformFeedbackBuffersEXT); - X(vkCmdBindVertexBuffers); - X(vkCmdBlitImage); - X(vkCmdClearAttachments); - X(vkCmdCopyBuffer); - X(vkCmdCopyBufferToImage); - X(vkCmdCopyImage); - X(vkCmdCopyImageToBuffer); - X(vkCmdDispatch); - X(vkCmdDraw); - X(vkCmdDrawIndexed); - X(vkCmdEndQuery); - X(vkCmdEndRenderPass); - X(vkCmdEndTransformFeedbackEXT); - X(vkCmdEndDebugUtilsLabelEXT); - X(vkCmdFillBuffer); - X(vkCmdPipelineBarrier); - X(vkCmdPushConstants); - X(vkCmdSetBlendConstants); - X(vkCmdSetDepthBias); - X(vkCmdSetDepthBounds); - X(vkCmdSetEvent); - X(vkCmdSetScissor); - X(vkCmdSetStencilCompareMask); - X(vkCmdSetStencilReference); - X(vkCmdSetStencilWriteMask); - X(vkCmdSetViewport); - X(vkCmdWaitEvents); - X(vkCmdBindVertexBuffers2EXT); - X(vkCmdSetCullModeEXT); - X(vkCmdSetDepthBoundsTestEnableEXT); - X(vkCmdSetDepthCompareOpEXT); - X(vkCmdSetDepthTestEnableEXT); - X(vkCmdSetDepthWriteEnableEXT); - X(vkCmdSetFrontFaceEXT); - X(vkCmdSetPrimitiveTopologyEXT); - X(vkCmdSetStencilOpEXT); - X(vkCmdSetStencilTestEnableEXT); - X(vkCmdResolveImage); - X(vkCreateBuffer); - X(vkCreateBufferView); - X(vkCreateCommandPool); - X(vkCreateComputePipelines); - X(vkCreateDescriptorPool); - X(vkCreateDescriptorSetLayout); - X(vkCreateDescriptorUpdateTemplateKHR); - X(vkCreateEvent); - X(vkCreateFence); - X(vkCreateFramebuffer); - X(vkCreateGraphicsPipelines); - X(vkCreateImage); - X(vkCreateImageView); - X(vkCreatePipelineLayout); - X(vkCreateQueryPool); - X(vkCreateRenderPass); - X(vkCreateSampler); - X(vkCreateSemaphore); - X(vkCreateShaderModule); - X(vkCreateSwapchainKHR); - X(vkDestroyBuffer); - X(vkDestroyBufferView); - X(vkDestroyCommandPool); - X(vkDestroyDescriptorPool); - X(vkDestroyDescriptorSetLayout); - X(vkDestroyDescriptorUpdateTemplateKHR); - X(vkDestroyEvent); - X(vkDestroyFence); - X(vkDestroyFramebuffer); - X(vkDestroyImage); - X(vkDestroyImageView); - X(vkDestroyPipeline); - X(vkDestroyPipelineLayout); - X(vkDestroyQueryPool); - X(vkDestroyRenderPass); - X(vkDestroySampler); - X(vkDestroySemaphore); - X(vkDestroyShaderModule); - X(vkDestroySwapchainKHR); - X(vkDeviceWaitIdle); - X(vkEndCommandBuffer); - X(vkFreeCommandBuffers); - X(vkFreeDescriptorSets); - X(vkFreeMemory); - X(vkGetBufferMemoryRequirements); - X(vkGetDeviceQueue); - X(vkGetEventStatus); - X(vkGetFenceStatus); - X(vkGetImageMemoryRequirements); - X(vkGetQueryPoolResults); - X(vkGetSemaphoreCounterValueKHR); - X(vkMapMemory); - X(vkQueueSubmit); - X(vkResetFences); - X(vkResetQueryPoolEXT); - X(vkSetDebugUtilsObjectNameEXT); - X(vkSetDebugUtilsObjectTagEXT); - X(vkUnmapMemory); - X(vkUpdateDescriptorSetWithTemplateKHR); - X(vkUpdateDescriptorSets); - X(vkWaitForFences); - X(vkWaitSemaphoresKHR); -#undef X -} - -template -void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, - const char* name) { - const VkDebugUtilsObjectNameInfoEXT name_info{ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, - .pNext = nullptr, - .objectType = VK_OBJECT_TYPE_IMAGE, - .objectHandle = reinterpret_cast(handle), - .pObjectName = name, - }; - Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); -} - -} // Anonymous namespace - -bool Load(InstanceDispatch& dld) noexcept { -#define X(name) Proc(dld.name, dld, #name) - return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && - X(vkEnumerateInstanceLayerProperties); -#undef X -} - -bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { -#define X(name) Proc(dld.name, dld, #name, instance) - // These functions may fail to load depending on the enabled extensions. - // Don't return a failure on these. - X(vkCreateDebugUtilsMessengerEXT); - X(vkDestroyDebugUtilsMessengerEXT); - X(vkDestroySurfaceKHR); - X(vkGetPhysicalDeviceFeatures2KHR); - X(vkGetPhysicalDeviceProperties2KHR); - X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); - X(vkGetPhysicalDeviceSurfaceFormatsKHR); - X(vkGetPhysicalDeviceSurfacePresentModesKHR); - X(vkGetPhysicalDeviceSurfaceSupportKHR); - X(vkGetSwapchainImagesKHR); - X(vkQueuePresentKHR); - - return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && - X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && - X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && - X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) && - X(vkGetPhysicalDeviceQueueFamilyProperties); -#undef X -} - -const char* Exception::what() const noexcept { - return ToString(result); -} - -const char* ToString(VkResult result) noexcept { - switch (result) { - case VkResult::VK_SUCCESS: - return "VK_SUCCESS"; - case VkResult::VK_NOT_READY: - return "VK_NOT_READY"; - case VkResult::VK_TIMEOUT: - return "VK_TIMEOUT"; - case VkResult::VK_EVENT_SET: - return "VK_EVENT_SET"; - case VkResult::VK_EVENT_RESET: - return "VK_EVENT_RESET"; - case VkResult::VK_INCOMPLETE: - return "VK_INCOMPLETE"; - case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY: - return "VK_ERROR_OUT_OF_HOST_MEMORY"; - case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY: - return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; - case VkResult::VK_ERROR_INITIALIZATION_FAILED: - return "VK_ERROR_INITIALIZATION_FAILED"; - case VkResult::VK_ERROR_DEVICE_LOST: - return "VK_ERROR_DEVICE_LOST"; - case VkResult::VK_ERROR_MEMORY_MAP_FAILED: - return "VK_ERROR_MEMORY_MAP_FAILED"; - case VkResult::VK_ERROR_LAYER_NOT_PRESENT: - return "VK_ERROR_LAYER_NOT_PRESENT"; - case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT: - return "VK_ERROR_EXTENSION_NOT_PRESENT"; - case VkResult::VK_ERROR_FEATURE_NOT_PRESENT: - return "VK_ERROR_FEATURE_NOT_PRESENT"; - case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER: - return "VK_ERROR_INCOMPATIBLE_DRIVER"; - case VkResult::VK_ERROR_TOO_MANY_OBJECTS: - return "VK_ERROR_TOO_MANY_OBJECTS"; - case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED: - return "VK_ERROR_FORMAT_NOT_SUPPORTED"; - case VkResult::VK_ERROR_FRAGMENTED_POOL: - return "VK_ERROR_FRAGMENTED_POOL"; - case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY: - return "VK_ERROR_OUT_OF_POOL_MEMORY"; - case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE: - return "VK_ERROR_INVALID_EXTERNAL_HANDLE"; - case VkResult::VK_ERROR_SURFACE_LOST_KHR: - return "VK_ERROR_SURFACE_LOST_KHR"; - case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: - return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; - case VkResult::VK_SUBOPTIMAL_KHR: - return "VK_SUBOPTIMAL_KHR"; - case VkResult::VK_ERROR_OUT_OF_DATE_KHR: - return "VK_ERROR_OUT_OF_DATE_KHR"; - case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: - return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; - case VkResult::VK_ERROR_VALIDATION_FAILED_EXT: - return "VK_ERROR_VALIDATION_FAILED_EXT"; - case VkResult::VK_ERROR_INVALID_SHADER_NV: - return "VK_ERROR_INVALID_SHADER_NV"; - case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: - return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"; - case VkResult::VK_ERROR_FRAGMENTATION_EXT: - return "VK_ERROR_FRAGMENTATION_EXT"; - case VkResult::VK_ERROR_NOT_PERMITTED_EXT: - return "VK_ERROR_NOT_PERMITTED_EXT"; - case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT: - return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"; - case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: - return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; - case VkResult::VK_ERROR_UNKNOWN: - return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; - case VkResult::VK_THREAD_IDLE_KHR: - return "VK_THREAD_IDLE_KHR"; - case VkResult::VK_THREAD_DONE_KHR: - return "VK_THREAD_DONE_KHR"; - case VkResult::VK_OPERATION_DEFERRED_KHR: - return "VK_OPERATION_DEFERRED_KHR"; - case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: - return "VK_OPERATION_NOT_DEFERRED_KHR"; - case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: - return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; - case VkResult::VK_RESULT_MAX_ENUM: - return "VK_RESULT_MAX_ENUM"; - } - return "Unknown"; -} - -void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept { - dld.vkDestroyInstance(instance, nullptr); -} - -void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept { - dld.vkDestroyDevice(device, nullptr); -} - -void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyBuffer(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyBufferView(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyCommandPool(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyDescriptorPool(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyDescriptorSetLayout(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle, - const DeviceDispatch& dld) noexcept { - dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept { - dld.vkFreeMemory(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyEvent(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyFence(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyFramebuffer(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyImage(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyImageView(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyPipeline(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyPipelineLayout(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyQueryPool(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyRenderPass(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroySampler(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroySwapchainKHR(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroySemaphore(device, handle, nullptr); -} - -void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept { - dld.vkDestroyShaderModule(device, handle, nullptr); -} - -void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle, - const InstanceDispatch& dld) noexcept { - dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); -} - -void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { - dld.vkDestroySurfaceKHR(instance, handle, nullptr); -} - -VkResult Free(VkDevice device, VkDescriptorPool handle, Span sets, - const DeviceDispatch& dld) noexcept { - return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data()); -} - -VkResult Free(VkDevice device, VkCommandPool handle, Span buffers, - const DeviceDispatch& dld) noexcept { - dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data()); - return VK_SUCCESS; -} - -Instance Instance::Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dispatch) noexcept { - const VkApplicationInfo application_info{ - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pNext = nullptr, - .pApplicationName = "yuzu Emulator", - .applicationVersion = VK_MAKE_VERSION(0, 1, 0), - .pEngineName = "yuzu Emulator", - .engineVersion = VK_MAKE_VERSION(0, 1, 0), - .apiVersion = version, - }; - const VkInstanceCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .pApplicationInfo = &application_info, - .enabledLayerCount = layers.size(), - .ppEnabledLayerNames = layers.data(), - .enabledExtensionCount = extensions.size(), - .ppEnabledExtensionNames = extensions.data(), - }; - - VkInstance instance; - if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { - // Failed to create the instance. - return {}; - } - if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { - // We successfully created an instance but the destroy function couldn't be loaded. - // This is a good moment to panic. - return {}; - } - - return Instance(instance, dispatch); -} - -std::optional> Instance::EnumeratePhysicalDevices() { - u32 num; - if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { - return std::nullopt; - } - std::vector physical_devices(num); - if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { - return std::nullopt; - } - SortPhysicalDevices(physical_devices, *dld); - return std::make_optional(std::move(physical_devices)); -} - -DebugCallback Instance::TryCreateDebugCallback( - PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - const VkDebugUtilsMessengerCreateInfoEXT ci{ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - .pNext = nullptr, - .flags = 0, - .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, - .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, - .pfnUserCallback = callback, - .pUserData = nullptr, - }; - - VkDebugUtilsMessengerEXT messenger; - if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { - return {}; - } - return DebugCallback(messenger, handle, *dld); -} - -void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { - Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); -} - -void Buffer::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); -} - -void BufferView::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); -} - -void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { - Check(dld->vkBindImageMemory(owner, handle, memory, offset)); -} - -void Image::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); -} - -void ImageView::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); -} - -void DeviceMemory::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); -} - -void Fence::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); -} - -void Framebuffer::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); -} - -DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { - const std::size_t num = ai.descriptorSetCount; - std::unique_ptr sets = std::make_unique(num); - switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) { - case VK_SUCCESS: - return DescriptorSets(std::move(sets), num, owner, handle, *dld); - case VK_ERROR_OUT_OF_POOL_MEMORY: - return {}; - default: - throw Exception(result); - } -} - -void DescriptorPool::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); -} - -CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - const VkCommandBufferAllocateInfo ai{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .pNext = nullptr, - .commandPool = handle, - .level = level, - .commandBufferCount = static_cast(num_buffers), - }; - - std::unique_ptr buffers = std::make_unique(num_buffers); - switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { - case VK_SUCCESS: - return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld); - case VK_ERROR_OUT_OF_POOL_MEMORY: - return {}; - default: - throw Exception(result); - } -} - -void CommandPool::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); -} - -std::vector SwapchainKHR::GetImages() const { - u32 num; - Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); - std::vector images(num); - Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data())); - return images; -} - -void Event::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); -} - -void ShaderModule::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); -} - -void Semaphore::SetObjectNameEXT(const char* name) const { - SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); -} - -Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, - Span enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept { - const VkDeviceCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = next, - .flags = 0, - .queueCreateInfoCount = queues_ci.size(), - .pQueueCreateInfos = queues_ci.data(), - .enabledLayerCount = 0, - .ppEnabledLayerNames = nullptr, - .enabledExtensionCount = enabled_extensions.size(), - .ppEnabledExtensionNames = enabled_extensions.data(), - .pEnabledFeatures = nullptr, - }; - - VkDevice device; - if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { - return {}; - } - Load(device, dispatch); - return Device(device, dispatch); -} - -Queue Device::GetQueue(u32 family_index) const noexcept { - VkQueue queue; - dld->vkGetDeviceQueue(handle, family_index, 0, &queue); - return Queue(queue, *dld); -} - -Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { - VkBuffer object; - Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); - return Buffer(object, handle, *dld); -} - -BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { - VkBufferView object; - Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); - return BufferView(object, handle, *dld); -} - -Image Device::CreateImage(const VkImageCreateInfo& ci) const { - VkImage object; - Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); - return Image(object, handle, *dld); -} - -ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { - VkImageView object; - Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); - return ImageView(object, handle, *dld); -} - -Semaphore Device::CreateSemaphore() const { - static constexpr VkSemaphoreCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; - return CreateSemaphore(ci); -} - -Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const { - VkSemaphore object; - Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); - return Semaphore(object, handle, *dld); -} - -Fence Device::CreateFence(const VkFenceCreateInfo& ci) const { - VkFence object; - Check(dld->vkCreateFence(handle, &ci, nullptr, &object)); - return Fence(object, handle, *dld); -} - -DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const { - VkDescriptorPool object; - Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object)); - return DescriptorPool(object, handle, *dld); -} - -RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const { - VkRenderPass object; - Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object)); - return RenderPass(object, handle, *dld); -} - -DescriptorSetLayout Device::CreateDescriptorSetLayout( - const VkDescriptorSetLayoutCreateInfo& ci) const { - VkDescriptorSetLayout object; - Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object)); - return DescriptorSetLayout(object, handle, *dld); -} - -PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { - VkPipelineLayout object; - Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); - return PipelineLayout(object, handle, *dld); -} - -Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { - VkPipeline object; - Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); - return Pipeline(object, handle, *dld); -} - -Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { - VkPipeline object; - Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); - return Pipeline(object, handle, *dld); -} - -Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const { - VkSampler object; - Check(dld->vkCreateSampler(handle, &ci, nullptr, &object)); - return Sampler(object, handle, *dld); -} - -Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const { - VkFramebuffer object; - Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object)); - return Framebuffer(object, handle, *dld); -} - -CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const { - VkCommandPool object; - Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object)); - return CommandPool(object, handle, *dld); -} - -DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR( - const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const { - VkDescriptorUpdateTemplateKHR object; - Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object)); - return DescriptorUpdateTemplateKHR(object, handle, *dld); -} - -QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const { - VkQueryPool object; - Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object)); - return QueryPool(object, handle, *dld); -} - -ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const { - VkShaderModule object; - Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object)); - return ShaderModule(object, handle, *dld); -} - -Event Device::CreateEvent() const { - static constexpr VkEventCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; - - VkEvent object; - Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); - return Event(object, handle, *dld); -} - -SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { - VkSwapchainKHR object; - Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); - return SwapchainKHR(object, handle, *dld); -} - -DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept { - VkDeviceMemory memory; - if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) { - return {}; - } - return DeviceMemory(memory, handle, *dld); -} - -DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { - VkDeviceMemory memory; - Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory)); - return DeviceMemory(memory, handle, *dld); -} - -VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { - VkMemoryRequirements requirements; - dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); - return requirements; -} - -VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { - VkMemoryRequirements requirements; - dld->vkGetImageMemoryRequirements(handle, image, &requirements); - return requirements; -} - -void Device::UpdateDescriptorSets(Span writes, - Span copies) const noexcept { - dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data()); -} - -VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept { - VkPhysicalDeviceProperties properties; - dld->vkGetPhysicalDeviceProperties(physical_device, &properties); - return properties; -} - -void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept { - dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties); -} - -VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept { - VkPhysicalDeviceFeatures2KHR features2; - features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - features2.pNext = nullptr; - dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2); - return features2.features; -} - -void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept { - dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features); -} - -VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept { - VkFormatProperties properties; - dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties); - return properties; -} - -std::vector PhysicalDevice::EnumerateDeviceExtensionProperties() const { - u32 num; - dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr); - std::vector properties(num); - dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data()); - return properties; -} - -std::vector PhysicalDevice::GetQueueFamilyProperties() const { - u32 num; - dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr); - std::vector properties(num); - dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data()); - return properties; -} - -bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const { - VkBool32 supported; - Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface, - &supported)); - return supported == VK_TRUE; -} - -VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const { - VkSurfaceCapabilitiesKHR capabilities; - Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); - return capabilities; -} - -std::vector PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const { - u32 num; - Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr)); - std::vector formats(num); - Check( - dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data())); - return formats; -} - -std::vector PhysicalDevice::GetSurfacePresentModesKHR( - VkSurfaceKHR surface) const { - u32 num; - Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr)); - std::vector modes(num); - Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, - modes.data())); - return modes; -} - -VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { - VkPhysicalDeviceMemoryProperties properties; - dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); - return properties; -} - -u32 AvailableVersion(const InstanceDispatch& dld) noexcept { - PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; - if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) { - // If the procedure is not found, Vulkan 1.0 is assumed - return VK_API_VERSION_1_0; - } - u32 version; - if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1", - ToString(result)); - return VK_API_VERSION_1_1; - } - return version; -} - -std::optional> EnumerateInstanceExtensionProperties( - const InstanceDispatch& dld) { - u32 num; - if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) { - return std::nullopt; - } - std::vector properties(num); - if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) != - VK_SUCCESS) { - return std::nullopt; - } - return properties; -} - -std::optional> EnumerateInstanceLayerProperties( - const InstanceDispatch& dld) { - u32 num; - if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { - return std::nullopt; - } - std::vector properties(num); - if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { - return std::nullopt; - } - return properties; -} - -} // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h deleted file mode 100644 index f9a184e00..000000000 --- a/src/video_core/renderer_vulkan/wrapper.h +++ /dev/null @@ -1,1213 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define VK_NO_PROTOTYPES -#include - -#include "common/common_types.h" - -#ifdef _MSC_VER -#pragma warning(disable : 26812) // Disable prefer enum class over enum -#endif - -namespace Vulkan::vk { - -/** - * Span for Vulkan arrays. - * Based on std::span but optimized for array access instead of iterators. - * Size returns uint32_t instead of size_t to ease interaction with Vulkan functions. - */ -template -class Span { -public: - using value_type = T; - using size_type = u32; - using difference_type = std::ptrdiff_t; - using reference = const T&; - using const_reference = const T&; - using pointer = const T*; - using const_pointer = const T*; - using iterator = const T*; - using const_iterator = const T*; - - /// Construct an empty span. - constexpr Span() noexcept = default; - - /// Construct an empty span - constexpr Span(std::nullptr_t) noexcept {} - - /// Construct a span from a single element. - constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} - - /// Construct a span from a range. - template - // requires std::data(const Range&) - // requires std::size(const Range&) - constexpr Span(const Range& range) : ptr{std::data(range)}, num{std::size(range)} {} - - /// Construct a span from a pointer and a size. - /// This is inteded for subranges. - constexpr Span(const T* ptr_, std::size_t num_) noexcept : ptr{ptr_}, num{num_} {} - - /// Returns the data pointer by the span. - constexpr const T* data() const noexcept { - return ptr; - } - - /// Returns the number of elements in the span. - /// @note Returns a 32 bits integer because most Vulkan functions expect this type. - constexpr u32 size() const noexcept { - return static_cast(num); - } - - /// Returns true when the span is empty. - constexpr bool empty() const noexcept { - return num == 0; - } - - /// Returns a reference to the element in the passed index. - /// @pre: index < size() - constexpr const T& operator[](std::size_t index) const noexcept { - return ptr[index]; - } - - /// Returns an iterator to the beginning of the span. - constexpr const T* begin() const noexcept { - return ptr; - } - - /// Returns an iterator to the end of the span. - constexpr const T* end() const noexcept { - return ptr + num; - } - - /// Returns an iterator to the beginning of the span. - constexpr const T* cbegin() const noexcept { - return ptr; - } - - /// Returns an iterator to the end of the span. - constexpr const T* cend() const noexcept { - return ptr + num; - } - -private: - const T* ptr = nullptr; - std::size_t num = 0; -}; - -/// Vulkan exception generated from a VkResult. -class Exception final : public std::exception { -public: - /// Construct the exception with a result. - /// @pre result != VK_SUCCESS - explicit Exception(VkResult result_) : result{result_} {} - virtual ~Exception() = default; - - const char* what() const noexcept override; - -private: - VkResult result; -}; - -/// Converts a VkResult enum into a rodata string -const char* ToString(VkResult) noexcept; - -/// Throws a Vulkan exception if result is not success. -inline void Check(VkResult result) { - if (result != VK_SUCCESS) { - throw Exception(result); - } -} - -/// Throws a Vulkan exception if result is an error. -/// @return result -inline VkResult Filter(VkResult result) { - if (result < 0) { - throw Exception(result); - } - return result; -} - -/// Table holding Vulkan instance function pointers. -struct InstanceDispatch { - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; - - PFN_vkCreateInstance vkCreateInstance; - PFN_vkDestroyInstance vkDestroyInstance; - PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; - PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; - - PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; - PFN_vkCreateDevice vkCreateDevice; - PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT; - PFN_vkDestroyDevice vkDestroyDevice; - PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; - PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties; - PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; - PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; - PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; - PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties; - PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; - PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; - PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; - PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; - PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; - PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; - PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; - PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; - PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; - PFN_vkQueuePresentKHR vkQueuePresentKHR; -}; - -/// Table holding Vulkan device function pointers. -struct DeviceDispatch : public InstanceDispatch { - PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; - PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; - PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets; - PFN_vkAllocateMemory vkAllocateMemory; - PFN_vkBeginCommandBuffer vkBeginCommandBuffer; - PFN_vkBindBufferMemory vkBindBufferMemory; - PFN_vkBindImageMemory vkBindImageMemory; - PFN_vkCmdBeginQuery vkCmdBeginQuery; - PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; - PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; - PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; - PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; - PFN_vkCmdBindPipeline vkCmdBindPipeline; - PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT; - PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; - PFN_vkCmdBlitImage vkCmdBlitImage; - PFN_vkCmdClearAttachments vkCmdClearAttachments; - PFN_vkCmdCopyBuffer vkCmdCopyBuffer; - PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; - PFN_vkCmdCopyImage vkCmdCopyImage; - PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer; - PFN_vkCmdDispatch vkCmdDispatch; - PFN_vkCmdDraw vkCmdDraw; - PFN_vkCmdDrawIndexed vkCmdDrawIndexed; - PFN_vkCmdEndQuery vkCmdEndQuery; - PFN_vkCmdEndRenderPass vkCmdEndRenderPass; - PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; - PFN_vkCmdFillBuffer vkCmdFillBuffer; - PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; - PFN_vkCmdPushConstants vkCmdPushConstants; - PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; - PFN_vkCmdSetDepthBias vkCmdSetDepthBias; - PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; - PFN_vkCmdSetEvent vkCmdSetEvent; - PFN_vkCmdSetScissor vkCmdSetScissor; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; - PFN_vkCmdSetViewport vkCmdSetViewport; - PFN_vkCmdWaitEvents vkCmdWaitEvents; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; - PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; - PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; - PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; - PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; - PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; - PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; - PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; - PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; - PFN_vkCmdResolveImage vkCmdResolveImage; - PFN_vkCreateBuffer vkCreateBuffer; - PFN_vkCreateBufferView vkCreateBufferView; - PFN_vkCreateCommandPool vkCreateCommandPool; - PFN_vkCreateComputePipelines vkCreateComputePipelines; - PFN_vkCreateDescriptorPool vkCreateDescriptorPool; - PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; - PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; - PFN_vkCreateEvent vkCreateEvent; - PFN_vkCreateFence vkCreateFence; - PFN_vkCreateFramebuffer vkCreateFramebuffer; - PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; - PFN_vkCreateImage vkCreateImage; - PFN_vkCreateImageView vkCreateImageView; - PFN_vkCreatePipelineLayout vkCreatePipelineLayout; - PFN_vkCreateQueryPool vkCreateQueryPool; - PFN_vkCreateRenderPass vkCreateRenderPass; - PFN_vkCreateSampler vkCreateSampler; - PFN_vkCreateSemaphore vkCreateSemaphore; - PFN_vkCreateShaderModule vkCreateShaderModule; - PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; - PFN_vkDestroyBuffer vkDestroyBuffer; - PFN_vkDestroyBufferView vkDestroyBufferView; - PFN_vkDestroyCommandPool vkDestroyCommandPool; - PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; - PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; - PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; - PFN_vkDestroyEvent vkDestroyEvent; - PFN_vkDestroyFence vkDestroyFence; - PFN_vkDestroyFramebuffer vkDestroyFramebuffer; - PFN_vkDestroyImage vkDestroyImage; - PFN_vkDestroyImageView vkDestroyImageView; - PFN_vkDestroyPipeline vkDestroyPipeline; - PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; - PFN_vkDestroyQueryPool vkDestroyQueryPool; - PFN_vkDestroyRenderPass vkDestroyRenderPass; - PFN_vkDestroySampler vkDestroySampler; - PFN_vkDestroySemaphore vkDestroySemaphore; - PFN_vkDestroyShaderModule vkDestroyShaderModule; - PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; - PFN_vkDeviceWaitIdle vkDeviceWaitIdle; - PFN_vkEndCommandBuffer vkEndCommandBuffer; - PFN_vkFreeCommandBuffers vkFreeCommandBuffers; - PFN_vkFreeDescriptorSets vkFreeDescriptorSets; - PFN_vkFreeMemory vkFreeMemory; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; - PFN_vkGetDeviceQueue vkGetDeviceQueue; - PFN_vkGetEventStatus vkGetEventStatus; - PFN_vkGetFenceStatus vkGetFenceStatus; - PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; - PFN_vkGetQueryPoolResults vkGetQueryPoolResults; - PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; - PFN_vkMapMemory vkMapMemory; - PFN_vkQueueSubmit vkQueueSubmit; - PFN_vkResetFences vkResetFences; - PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; - PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; - PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; - PFN_vkUnmapMemory vkUnmapMemory; - PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; - PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; - PFN_vkWaitForFences vkWaitForFences; - PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; -}; - -/// Loads instance agnostic function pointers. -/// @return True on success, false on error. -bool Load(InstanceDispatch&) noexcept; - -/// Loads instance function pointers. -/// @return True on success, false on error. -bool Load(VkInstance, InstanceDispatch&) noexcept; - -void Destroy(VkInstance, const InstanceDispatch&) noexcept; -void Destroy(VkDevice, const InstanceDispatch&) noexcept; - -void Destroy(VkDevice, VkBuffer, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkBufferView, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkCommandPool, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkSampler, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; -void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; -void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; -void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; - -VkResult Free(VkDevice, VkDescriptorPool, Span, const DeviceDispatch&) noexcept; -VkResult Free(VkDevice, VkCommandPool, Span, const DeviceDispatch&) noexcept; - -template -class Handle; - -/// Handle with an owning type. -/// Analogue to std::unique_ptr. -template -class Handle { -public: - /// Construct a handle and hold it's ownership. - explicit Handle(Type handle_, OwnerType owner_, const Dispatch& dld_) noexcept - : handle{handle_}, owner{owner_}, dld{&dld_} {} - - /// Construct an empty handle. - Handle() = default; - - /// Copying Vulkan objects is not supported and will never be. - Handle(const Handle&) = delete; - Handle& operator=(const Handle&) = delete; - - /// Construct a handle transfering the ownership from another handle. - Handle(Handle&& rhs) noexcept - : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, dld{rhs.dld} {} - - /// Assign the current handle transfering the ownership from another handle. - /// Destroys any previously held object. - Handle& operator=(Handle&& rhs) noexcept { - Release(); - handle = std::exchange(rhs.handle, nullptr); - owner = rhs.owner; - dld = rhs.dld; - return *this; - } - - /// Destroys the current handle if it existed. - ~Handle() noexcept { - Release(); - } - - /// Destroys any held object. - void reset() noexcept { - Release(); - handle = nullptr; - } - - /// Returns the address of the held object. - /// Intended for Vulkan structures that expect a pointer to an array. - const Type* address() const noexcept { - return std::addressof(handle); - } - - /// Returns the held Vulkan handle. - Type operator*() const noexcept { - return handle; - } - - /// Returns true when there's a held object. - explicit operator bool() const noexcept { - return handle != nullptr; - } - -protected: - Type handle = nullptr; - OwnerType owner = nullptr; - const Dispatch* dld = nullptr; - -private: - /// Destroys the held object if it exists. - void Release() noexcept { - if (handle) { - Destroy(owner, handle, *dld); - } - } -}; - -/// Dummy type used to specify a handle has no owner. -struct NoOwner {}; - -/// Handle without an owning type. -/// Analogue to std::unique_ptr -template -class Handle { -public: - /// Construct a handle and hold it's ownership. - explicit Handle(Type handle_, const Dispatch& dld_) noexcept : handle{handle_}, dld{&dld_} {} - - /// Construct an empty handle. - Handle() noexcept = default; - - /// Copying Vulkan objects is not supported and will never be. - Handle(const Handle&) = delete; - Handle& operator=(const Handle&) = delete; - - /// Construct a handle transfering ownership from another handle. - Handle(Handle&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)}, dld{rhs.dld} {} - - /// Assign the current handle transfering the ownership from another handle. - /// Destroys any previously held object. - Handle& operator=(Handle&& rhs) noexcept { - Release(); - handle = std::exchange(rhs.handle, nullptr); - dld = rhs.dld; - return *this; - } - - /// Destroys the current handle if it existed. - ~Handle() noexcept { - Release(); - } - - /// Destroys any held object. - void reset() noexcept { - Release(); - handle = nullptr; - } - - /// Returns the address of the held object. - /// Intended for Vulkan structures that expect a pointer to an array. - const Type* address() const noexcept { - return std::addressof(handle); - } - - /// Returns the held Vulkan handle. - Type operator*() const noexcept { - return handle; - } - - /// Returns true when there's a held object. - operator bool() const noexcept { - return handle != nullptr; - } - -protected: - Type handle = nullptr; - const Dispatch* dld = nullptr; - -private: - /// Destroys the held object if it exists. - void Release() noexcept { - if (handle) { - Destroy(handle, *dld); - } - } -}; - -/// Array of a pool allocation. -/// Analogue to std::vector -template -class PoolAllocations { -public: - /// Construct an empty allocation. - PoolAllocations() = default; - - /// Construct an allocation. Errors are reported through IsOutOfPoolMemory(). - explicit PoolAllocations(std::unique_ptr allocations_, std::size_t num_, - VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept - : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_}, - dld{&dld_} {} - - /// Copying Vulkan allocations is not supported and will never be. - PoolAllocations(const PoolAllocations&) = delete; - PoolAllocations& operator=(const PoolAllocations&) = delete; - - /// Construct an allocation transfering ownership from another allocation. - PoolAllocations(PoolAllocations&& rhs) noexcept - : allocations{std::move(rhs.allocations)}, num{rhs.num}, device{rhs.device}, pool{rhs.pool}, - dld{rhs.dld} {} - - /// Assign an allocation transfering ownership from another allocation. - /// Releases any previously held allocation. - PoolAllocations& operator=(PoolAllocations&& rhs) noexcept { - Release(); - allocations = std::move(rhs.allocations); - num = rhs.num; - device = rhs.device; - pool = rhs.pool; - dld = rhs.dld; - return *this; - } - - /// Destroys any held allocation. - ~PoolAllocations() { - Release(); - } - - /// Returns the number of allocations. - std::size_t size() const noexcept { - return num; - } - - /// Returns a pointer to the array of allocations. - AllocationType const* data() const noexcept { - return allocations.get(); - } - - /// Returns the allocation in the specified index. - /// @pre index < size() - AllocationType operator[](std::size_t index) const noexcept { - return allocations[index]; - } - - /// True when a pool fails to construct. - bool IsOutOfPoolMemory() const noexcept { - return !device; - } - -private: - /// Destroys the held allocations if they exist. - void Release() noexcept { - if (!allocations) { - return; - } - const Span span(allocations.get(), num); - const VkResult result = Free(device, pool, span, *dld); - // There's no way to report errors from a destructor. - if (result != VK_SUCCESS) { - std::terminate(); - } - } - - std::unique_ptr allocations; - std::size_t num = 0; - VkDevice device = nullptr; - PoolType pool = nullptr; - const DeviceDispatch* dld = nullptr; -}; - -using DebugCallback = Handle; -using DescriptorSetLayout = Handle; -using DescriptorUpdateTemplateKHR = Handle; -using Pipeline = Handle; -using PipelineLayout = Handle; -using QueryPool = Handle; -using RenderPass = Handle; -using Sampler = Handle; -using SurfaceKHR = Handle; - -using DescriptorSets = PoolAllocations; -using CommandBuffers = PoolAllocations; - -/// Vulkan instance owning handle. -class Instance : public Handle { - using Handle::Handle; - -public: - /// Creates a Vulkan instance. Use "operator bool" for error handling. - static Instance Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dispatch) noexcept; - - /// Enumerates physical devices. - /// @return Physical devices and an empty handle on failure. - std::optional> EnumeratePhysicalDevices(); - - /// Tries to create a debug callback messenger. Returns an empty handle on failure. - DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; -}; - -class Queue { -public: - /// Construct an empty queue handle. - constexpr Queue() noexcept = default; - - /// Construct a queue handle. - constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept - : queue{queue_}, dld{&dld_} {} - - VkResult Submit(Span submit_infos, - VkFence fence = VK_NULL_HANDLE) const noexcept { - return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); - } - - VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { - return dld->vkQueuePresentKHR(queue, &present_info); - } - -private: - VkQueue queue = nullptr; - const DeviceDispatch* dld = nullptr; -}; - -class Buffer : public Handle { - using Handle::Handle; - -public: - /// Attaches a memory allocation. - void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class BufferView : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class Image : public Handle { - using Handle::Handle; - -public: - /// Attaches a memory allocation. - void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class ImageView : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class DeviceMemory : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; - - u8* Map(VkDeviceSize offset, VkDeviceSize size) const { - void* data; - Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); - return static_cast(data); - } - - void Unmap() const noexcept { - dld->vkUnmapMemory(owner, handle); - } -}; - -class Fence : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; - - VkResult Wait(u64 timeout = std::numeric_limits::max()) const noexcept { - return dld->vkWaitForFences(owner, 1, &handle, true, timeout); - } - - VkResult GetStatus() const noexcept { - return dld->vkGetFenceStatus(owner, handle); - } - - void Reset() const { - Check(dld->vkResetFences(owner, 1, &handle)); - } -}; - -class Framebuffer : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class DescriptorPool : public Handle { - using Handle::Handle; - -public: - DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class CommandPool : public Handle { - using Handle::Handle; - -public: - CommandBuffers Allocate(std::size_t num_buffers, - VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; - - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class SwapchainKHR : public Handle { - using Handle::Handle; - -public: - std::vector GetImages() const; -}; - -class Event : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; - - VkResult GetStatus() const noexcept { - return dld->vkGetEventStatus(owner, handle); - } -}; - -class ShaderModule : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; -}; - -class Semaphore : public Handle { - using Handle::Handle; - -public: - /// Set object name. - void SetObjectNameEXT(const char* name) const; - - [[nodiscard]] u64 GetCounter() const { - u64 value; - Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); - return value; - } - - /** - * Waits for a timeline semaphore on the host. - * - * @param value Value to wait - * @param timeout Time in nanoseconds to timeout - * @return True on successful wait, false on timeout - */ - bool Wait(u64 value, u64 timeout = std::numeric_limits::max()) const { - const VkSemaphoreWaitInfoKHR wait_info{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .semaphoreCount = 1, - .pSemaphores = &handle, - .pValues = &value, - }; - const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout); - switch (result) { - case VK_SUCCESS: - return true; - case VK_TIMEOUT: - return false; - default: - throw Exception(result); - } - } -}; - -class Device : public Handle { - using Handle::Handle; - -public: - static Device Create(VkPhysicalDevice physical_device, Span queues_ci, - Span enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept; - - Queue GetQueue(u32 family_index) const noexcept; - - Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; - - BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; - - Image CreateImage(const VkImageCreateInfo& ci) const; - - ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; - - Semaphore CreateSemaphore() const; - - Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const; - - Fence CreateFence(const VkFenceCreateInfo& ci) const; - - DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; - - RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const; - - DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; - - PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; - - Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; - - Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; - - Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; - - Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const; - - CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const; - - DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR( - const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const; - - QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const; - - ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - - Event CreateEvent() const; - - SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; - - DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; - - DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; - - VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; - - VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; - - void UpdateDescriptorSets(Span writes, - Span copies) const noexcept; - - void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template, - const void* data) const noexcept { - dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data); - } - - VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore, - VkFence fence, u32* image_index) const noexcept { - return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence, - image_index); - } - - VkResult WaitIdle() const noexcept { - return dld->vkDeviceWaitIdle(handle); - } - - void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept { - dld->vkResetQueryPoolEXT(handle, query_pool, first, count); - } - - VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, - void* data, VkDeviceSize stride, - VkQueryResultFlags flags) const noexcept { - return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, - flags); - } -}; - -class PhysicalDevice { -public: - constexpr PhysicalDevice() noexcept = default; - - constexpr PhysicalDevice(VkPhysicalDevice physical_device_, - const InstanceDispatch& dld_) noexcept - : physical_device{physical_device_}, dld{&dld_} {} - - constexpr operator VkPhysicalDevice() const noexcept { - return physical_device; - } - - VkPhysicalDeviceProperties GetProperties() const noexcept; - - void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept; - - VkPhysicalDeviceFeatures GetFeatures() const noexcept; - - void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept; - - VkFormatProperties GetFormatProperties(VkFormat) const noexcept; - - std::vector EnumerateDeviceExtensionProperties() const; - - std::vector GetQueueFamilyProperties() const; - - bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; - - VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const; - - std::vector GetSurfaceFormatsKHR(VkSurfaceKHR) const; - - std::vector GetSurfacePresentModesKHR(VkSurfaceKHR) const; - - VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; - -private: - VkPhysicalDevice physical_device = nullptr; - const InstanceDispatch* dld = nullptr; -}; - -class CommandBuffer { -public: - CommandBuffer() noexcept = default; - - explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept - : handle{handle_}, dld{&dld_} {} - - const VkCommandBuffer* address() const noexcept { - return &handle; - } - - void Begin(const VkCommandBufferBeginInfo& begin_info) const { - Check(dld->vkBeginCommandBuffer(handle, &begin_info)); - } - - void End() const { - Check(dld->vkEndCommandBuffer(handle)); - } - - void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi, - VkSubpassContents contents) const noexcept { - dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents); - } - - void EndRenderPass() const noexcept { - dld->vkCmdEndRenderPass(handle); - } - - void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept { - dld->vkCmdBeginQuery(handle, query_pool, query, flags); - } - - void EndQuery(VkQueryPool query_pool, u32 query) const noexcept { - dld->vkCmdEndQuery(handle, query_pool, query); - } - - void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first, - Span sets, Span dynamic_offsets) const noexcept { - dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(), - dynamic_offsets.size(), dynamic_offsets.data()); - } - - void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { - dld->vkCmdBindPipeline(handle, bind_point, pipeline); - } - - void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, - VkIndexType index_type) const noexcept { - dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); - } - - void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers, - const VkDeviceSize* offsets) const noexcept { - dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets); - } - - void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept { - BindVertexBuffers(binding, 1, &buffer, &offset); - } - - void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, - u32 first_instance) const noexcept { - dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); - } - - void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset, - u32 first_instance) const noexcept { - dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset, - first_instance); - } - - void ClearAttachments(Span attachments, - Span rects) const noexcept { - dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), - rects.data()); - } - - void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, - VkImageLayout dst_layout, Span regions, - VkFilter filter) const noexcept { - dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), - regions.data(), filter); - } - - void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, - VkImageLayout dst_layout, Span regions) { - dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), - regions.data()); - } - - void Dispatch(u32 x, u32 y, u32 z) const noexcept { - dld->vkCmdDispatch(handle, x, y, z); - } - - void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, - VkDependencyFlags dependency_flags, Span memory_barriers, - Span buffer_barriers, - Span image_barriers) const noexcept { - dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags, - memory_barriers.size(), memory_barriers.data(), - buffer_barriers.size(), buffer_barriers.data(), - image_barriers.size(), image_barriers.data()); - } - - void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, - VkDependencyFlags dependency_flags = 0) const noexcept { - PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); - } - - void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, - VkDependencyFlags dependency_flags, - const VkBufferMemoryBarrier& buffer_barrier) const noexcept { - PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); - } - - void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, - VkDependencyFlags dependency_flags, - const VkImageMemoryBarrier& image_barrier) const noexcept { - PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); - } - - void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, - Span regions) const noexcept { - dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), - regions.data()); - } - - void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - Span regions) const noexcept { - dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); - } - - void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, - VkImageLayout dst_layout, Span regions) const noexcept { - dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), - regions.data()); - } - - void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer, - Span regions) const noexcept { - dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(), - regions.data()); - } - - void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, - u32 data) const noexcept { - dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); - } - - void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size, - const void* values) const noexcept { - dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); - } - - template - void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, - const T& data) const noexcept { - static_assert(std::is_trivially_copyable_v, " is not trivially copyable"); - dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast(sizeof(T)), &data); - } - - void SetViewport(u32 first, Span viewports) const noexcept { - dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); - } - - void SetScissor(u32 first, Span scissors) const noexcept { - dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data()); - } - - void SetBlendConstants(const float blend_constants[4]) const noexcept { - dld->vkCmdSetBlendConstants(handle, blend_constants); - } - - void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept { - dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask); - } - - void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept { - dld->vkCmdSetStencilReference(handle, face_mask, reference); - } - - void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept { - dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask); - } - - void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept { - dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor); - } - - void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept { - dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); - } - - void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept { - dld->vkCmdSetEvent(handle, event, stage_flags); - } - - void WaitEvents(Span events, VkPipelineStageFlags src_stage_mask, - VkPipelineStageFlags dst_stage_mask, Span memory_barriers, - Span buffer_barriers, - Span image_barriers) const noexcept { - dld->vkCmdWaitEvents(handle, events.size(), events.data(), src_stage_mask, dst_stage_mask, - memory_barriers.size(), memory_barriers.data(), buffer_barriers.size(), - buffer_barriers.data(), image_barriers.size(), image_barriers.data()); - } - - void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, - const VkDeviceSize* offsets, const VkDeviceSize* sizes, - const VkDeviceSize* strides) const noexcept { - dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, - sizes, strides); - } - - void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { - dld->vkCmdSetCullModeEXT(handle, cull_mode); - } - - void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { - dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); - } - - void SetDepthTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetDepthWriteEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { - dld->vkCmdSetFrontFaceEXT(handle, front_face); - } - - void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { - dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); - } - - void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, - VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { - dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); - } - - void SetStencilTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, - const VkDeviceSize* offsets, - const VkDeviceSize* sizes) const noexcept { - dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); - } - - void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, - const VkBuffer* counter_buffers, - const VkDeviceSize* counter_buffer_offsets) const noexcept { - dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, - counter_buffers, counter_buffer_offsets); - } - - void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, - const VkBuffer* counter_buffers, - const VkDeviceSize* counter_buffer_offsets) const noexcept { - dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, - counter_buffers, counter_buffer_offsets); - } - - void BeginDebugUtilsLabelEXT(const char* label, std::span color) const noexcept { - const VkDebugUtilsLabelEXT label_info{ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = label, - .color{color[0], color[1], color[2], color[3]}, - }; - dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); - } - - void EndDebugUtilsLabelEXT() const noexcept { - dld->vkCmdEndDebugUtilsLabelEXT(handle); - } - -private: - VkCommandBuffer handle; - const DeviceDispatch* dld; -}; - -u32 AvailableVersion(const InstanceDispatch& dld) noexcept; - -std::optional> EnumerateInstanceExtensionProperties( - const InstanceDispatch& dld); - -std::optional> EnumerateInstanceLayerProperties( - const InstanceDispatch& dld); - -} // namespace Vulkan::vk diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp new file mode 100644 index 000000000..478402bbd --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -0,0 +1,928 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/logging/log.h" + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan::vk { + +namespace { + +template +void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld, + Func&& func) { + // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap + // functions. + std::stable_sort(devices.begin(), devices.end(), + [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) { + return func(vk::PhysicalDevice(lhs, dld).GetProperties(), + vk::PhysicalDevice(rhs, dld).GetProperties()); + }); +} + +void SortPhysicalDevicesPerVendor(std::vector& devices, + const InstanceDispatch& dld, + std::initializer_list vendor_ids) { + for (auto it = vendor_ids.end(); it != vendor_ids.begin();) { + --it; + SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) { + return lhs.vendorID == id && rhs.vendorID != id; + }); + } +} + +void SortPhysicalDevices(std::vector& devices, const InstanceDispatch& dld) { + // Sort by name, this will set a base and make GPUs with higher numbers appear first + // (e.g. GTX 1650 will intentionally be listed before a GTX 1080). + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName}; + }); + // Prefer discrete over non-discrete + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && + rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; + }); + // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest. + SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086}); +} + +template +bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name, + VkInstance instance = nullptr) noexcept { + result = reinterpret_cast(dld.vkGetInstanceProcAddr(instance, proc_name)); + return result != nullptr; +} + +template +void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept { + result = reinterpret_cast(dld.vkGetDeviceProcAddr(device, proc_name)); +} + +void Load(VkDevice device, DeviceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name, device) + X(vkAcquireNextImageKHR); + X(vkAllocateCommandBuffers); + X(vkAllocateDescriptorSets); + X(vkAllocateMemory); + X(vkBeginCommandBuffer); + X(vkBindBufferMemory); + X(vkBindImageMemory); + X(vkCmdBeginQuery); + X(vkCmdBeginRenderPass); + X(vkCmdBeginTransformFeedbackEXT); + X(vkCmdBeginDebugUtilsLabelEXT); + X(vkCmdBindDescriptorSets); + X(vkCmdBindIndexBuffer); + X(vkCmdBindPipeline); + X(vkCmdBindTransformFeedbackBuffersEXT); + X(vkCmdBindVertexBuffers); + X(vkCmdBlitImage); + X(vkCmdClearAttachments); + X(vkCmdCopyBuffer); + X(vkCmdCopyBufferToImage); + X(vkCmdCopyImage); + X(vkCmdCopyImageToBuffer); + X(vkCmdDispatch); + X(vkCmdDraw); + X(vkCmdDrawIndexed); + X(vkCmdEndQuery); + X(vkCmdEndRenderPass); + X(vkCmdEndTransformFeedbackEXT); + X(vkCmdEndDebugUtilsLabelEXT); + X(vkCmdFillBuffer); + X(vkCmdPipelineBarrier); + X(vkCmdPushConstants); + X(vkCmdSetBlendConstants); + X(vkCmdSetDepthBias); + X(vkCmdSetDepthBounds); + X(vkCmdSetEvent); + X(vkCmdSetScissor); + X(vkCmdSetStencilCompareMask); + X(vkCmdSetStencilReference); + X(vkCmdSetStencilWriteMask); + X(vkCmdSetViewport); + X(vkCmdWaitEvents); + X(vkCmdBindVertexBuffers2EXT); + X(vkCmdSetCullModeEXT); + X(vkCmdSetDepthBoundsTestEnableEXT); + X(vkCmdSetDepthCompareOpEXT); + X(vkCmdSetDepthTestEnableEXT); + X(vkCmdSetDepthWriteEnableEXT); + X(vkCmdSetFrontFaceEXT); + X(vkCmdSetPrimitiveTopologyEXT); + X(vkCmdSetStencilOpEXT); + X(vkCmdSetStencilTestEnableEXT); + X(vkCmdResolveImage); + X(vkCreateBuffer); + X(vkCreateBufferView); + X(vkCreateCommandPool); + X(vkCreateComputePipelines); + X(vkCreateDescriptorPool); + X(vkCreateDescriptorSetLayout); + X(vkCreateDescriptorUpdateTemplateKHR); + X(vkCreateEvent); + X(vkCreateFence); + X(vkCreateFramebuffer); + X(vkCreateGraphicsPipelines); + X(vkCreateImage); + X(vkCreateImageView); + X(vkCreatePipelineLayout); + X(vkCreateQueryPool); + X(vkCreateRenderPass); + X(vkCreateSampler); + X(vkCreateSemaphore); + X(vkCreateShaderModule); + X(vkCreateSwapchainKHR); + X(vkDestroyBuffer); + X(vkDestroyBufferView); + X(vkDestroyCommandPool); + X(vkDestroyDescriptorPool); + X(vkDestroyDescriptorSetLayout); + X(vkDestroyDescriptorUpdateTemplateKHR); + X(vkDestroyEvent); + X(vkDestroyFence); + X(vkDestroyFramebuffer); + X(vkDestroyImage); + X(vkDestroyImageView); + X(vkDestroyPipeline); + X(vkDestroyPipelineLayout); + X(vkDestroyQueryPool); + X(vkDestroyRenderPass); + X(vkDestroySampler); + X(vkDestroySemaphore); + X(vkDestroyShaderModule); + X(vkDestroySwapchainKHR); + X(vkDeviceWaitIdle); + X(vkEndCommandBuffer); + X(vkFreeCommandBuffers); + X(vkFreeDescriptorSets); + X(vkFreeMemory); + X(vkGetBufferMemoryRequirements); + X(vkGetDeviceQueue); + X(vkGetEventStatus); + X(vkGetFenceStatus); + X(vkGetImageMemoryRequirements); + X(vkGetQueryPoolResults); + X(vkGetSemaphoreCounterValueKHR); + X(vkMapMemory); + X(vkQueueSubmit); + X(vkResetFences); + X(vkResetQueryPoolEXT); + X(vkSetDebugUtilsObjectNameEXT); + X(vkSetDebugUtilsObjectTagEXT); + X(vkUnmapMemory); + X(vkUpdateDescriptorSetWithTemplateKHR); + X(vkUpdateDescriptorSets); + X(vkWaitForFences); + X(vkWaitSemaphoresKHR); +#undef X +} + +template +void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, + const char* name) { + const VkDebugUtilsObjectNameInfoEXT name_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = nullptr, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = reinterpret_cast(handle), + .pObjectName = name, + }; + Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); +} + +} // Anonymous namespace + +bool Load(InstanceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name) + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && + X(vkEnumerateInstanceLayerProperties); +#undef X +} + +bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name, instance) + // These functions may fail to load depending on the enabled extensions. + // Don't return a failure on these. + X(vkCreateDebugUtilsMessengerEXT); + X(vkDestroyDebugUtilsMessengerEXT); + X(vkDestroySurfaceKHR); + X(vkGetPhysicalDeviceFeatures2KHR); + X(vkGetPhysicalDeviceProperties2KHR); + X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); + X(vkGetPhysicalDeviceSurfaceFormatsKHR); + X(vkGetPhysicalDeviceSurfacePresentModesKHR); + X(vkGetPhysicalDeviceSurfaceSupportKHR); + X(vkGetSwapchainImagesKHR); + X(vkQueuePresentKHR); + + return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && + X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && + X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && + X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) && + X(vkGetPhysicalDeviceQueueFamilyProperties); +#undef X +} + +const char* Exception::what() const noexcept { + return ToString(result); +} + +const char* ToString(VkResult result) noexcept { + switch (result) { + case VkResult::VK_SUCCESS: + return "VK_SUCCESS"; + case VkResult::VK_NOT_READY: + return "VK_NOT_READY"; + case VkResult::VK_TIMEOUT: + return "VK_TIMEOUT"; + case VkResult::VK_EVENT_SET: + return "VK_EVENT_SET"; + case VkResult::VK_EVENT_RESET: + return "VK_EVENT_RESET"; + case VkResult::VK_INCOMPLETE: + return "VK_INCOMPLETE"; + case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY: + return "VK_ERROR_OUT_OF_HOST_MEMORY"; + case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY: + return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + case VkResult::VK_ERROR_INITIALIZATION_FAILED: + return "VK_ERROR_INITIALIZATION_FAILED"; + case VkResult::VK_ERROR_DEVICE_LOST: + return "VK_ERROR_DEVICE_LOST"; + case VkResult::VK_ERROR_MEMORY_MAP_FAILED: + return "VK_ERROR_MEMORY_MAP_FAILED"; + case VkResult::VK_ERROR_LAYER_NOT_PRESENT: + return "VK_ERROR_LAYER_NOT_PRESENT"; + case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT: + return "VK_ERROR_EXTENSION_NOT_PRESENT"; + case VkResult::VK_ERROR_FEATURE_NOT_PRESENT: + return "VK_ERROR_FEATURE_NOT_PRESENT"; + case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER: + return "VK_ERROR_INCOMPATIBLE_DRIVER"; + case VkResult::VK_ERROR_TOO_MANY_OBJECTS: + return "VK_ERROR_TOO_MANY_OBJECTS"; + case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED: + return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + case VkResult::VK_ERROR_FRAGMENTED_POOL: + return "VK_ERROR_FRAGMENTED_POOL"; + case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY: + return "VK_ERROR_OUT_OF_POOL_MEMORY"; + case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE: + return "VK_ERROR_INVALID_EXTERNAL_HANDLE"; + case VkResult::VK_ERROR_SURFACE_LOST_KHR: + return "VK_ERROR_SURFACE_LOST_KHR"; + case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: + return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; + case VkResult::VK_SUBOPTIMAL_KHR: + return "VK_SUBOPTIMAL_KHR"; + case VkResult::VK_ERROR_OUT_OF_DATE_KHR: + return "VK_ERROR_OUT_OF_DATE_KHR"; + case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: + return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; + case VkResult::VK_ERROR_VALIDATION_FAILED_EXT: + return "VK_ERROR_VALIDATION_FAILED_EXT"; + case VkResult::VK_ERROR_INVALID_SHADER_NV: + return "VK_ERROR_INVALID_SHADER_NV"; + case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: + return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"; + case VkResult::VK_ERROR_FRAGMENTATION_EXT: + return "VK_ERROR_FRAGMENTATION_EXT"; + case VkResult::VK_ERROR_NOT_PERMITTED_EXT: + return "VK_ERROR_NOT_PERMITTED_EXT"; + case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT: + return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"; + case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: + return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; + case VkResult::VK_ERROR_UNKNOWN: + return "VK_ERROR_UNKNOWN"; + case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: + return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; + case VkResult::VK_THREAD_IDLE_KHR: + return "VK_THREAD_IDLE_KHR"; + case VkResult::VK_THREAD_DONE_KHR: + return "VK_THREAD_DONE_KHR"; + case VkResult::VK_OPERATION_DEFERRED_KHR: + return "VK_OPERATION_DEFERRED_KHR"; + case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: + return "VK_OPERATION_NOT_DEFERRED_KHR"; + case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: + return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; + case VkResult::VK_RESULT_MAX_ENUM: + return "VK_RESULT_MAX_ENUM"; + } + return "Unknown"; +} + +void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept { + dld.vkDestroyInstance(instance, nullptr); +} + +void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept { + dld.vkDestroyDevice(device, nullptr); +} + +void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyBuffer(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyBufferView(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyCommandPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorSetLayout(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle, + const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept { + dld.vkFreeMemory(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyEvent(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyFence(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyFramebuffer(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyImage(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyImageView(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipeline(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipelineLayout(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyQueryPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyRenderPass(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySampler(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySwapchainKHR(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySemaphore(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyShaderModule(device, handle, nullptr); +} + +void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle, + const InstanceDispatch& dld) noexcept { + dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); +} + +void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { + dld.vkDestroySurfaceKHR(instance, handle, nullptr); +} + +VkResult Free(VkDevice device, VkDescriptorPool handle, Span sets, + const DeviceDispatch& dld) noexcept { + return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data()); +} + +VkResult Free(VkDevice device, VkCommandPool handle, Span buffers, + const DeviceDispatch& dld) noexcept { + dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data()); + return VK_SUCCESS; +} + +Instance Instance::Create(u32 version, Span layers, Span extensions, + InstanceDispatch& dispatch) noexcept { + const VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = version, + }; + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; + + VkInstance instance; + if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { + // Failed to create the instance. + return {}; + } + if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { + // We successfully created an instance but the destroy function couldn't be loaded. + // This is a good moment to panic. + return {}; + } + + return Instance(instance, dispatch); +} + +std::optional> Instance::EnumeratePhysicalDevices() { + u32 num; + if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector physical_devices(num); + if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { + return std::nullopt; + } + SortPhysicalDevices(physical_devices, *dld); + return std::make_optional(std::move(physical_devices)); +} + +DebugCallback Instance::TryCreateDebugCallback( + PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; + + VkDebugUtilsMessengerEXT messenger; + if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { + return {}; + } + return DebugCallback(messenger, handle, *dld); +} + +void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); +} + +void Buffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void BufferView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +} + +void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindImageMemory(owner, handle, memory, offset)); +} + +void Image::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +} + +void ImageView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); +} + +void DeviceMemory::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); +} + +void Fence::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); +} + +void Framebuffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); +} + +DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { + const std::size_t num = ai.descriptorSetCount; + std::unique_ptr sets = std::make_unique(num); + switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) { + case VK_SUCCESS: + return DescriptorSets(std::move(sets), num, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +void DescriptorPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); +} + +CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast(num_buffers), + }; + + std::unique_ptr buffers = std::make_unique(num_buffers); + switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { + case VK_SUCCESS: + return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +void CommandPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); +} + +std::vector SwapchainKHR::GetImages() const { + u32 num; + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); + std::vector images(num); + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data())); + return images; +} + +void Event::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); +} + +void ShaderModule::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); +} + +void Semaphore::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); +} + +Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, + Span enabled_extensions, const void* next, + DeviceDispatch& dispatch) noexcept { + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; + + VkDevice device; + if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { + return {}; + } + Load(device, dispatch); + return Device(device, dispatch); +} + +Queue Device::GetQueue(u32 family_index) const noexcept { + VkQueue queue; + dld->vkGetDeviceQueue(handle, family_index, 0, &queue); + return Queue(queue, *dld); +} + +Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { + VkBuffer object; + Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); + return Buffer(object, handle, *dld); +} + +BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { + VkBufferView object; + Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); + return BufferView(object, handle, *dld); +} + +Image Device::CreateImage(const VkImageCreateInfo& ci) const { + VkImage object; + Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); + return Image(object, handle, *dld); +} + +ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { + VkImageView object; + Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); + return ImageView(object, handle, *dld); +} + +Semaphore Device::CreateSemaphore() const { + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + return CreateSemaphore(ci); +} + +Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const { + VkSemaphore object; + Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); + return Semaphore(object, handle, *dld); +} + +Fence Device::CreateFence(const VkFenceCreateInfo& ci) const { + VkFence object; + Check(dld->vkCreateFence(handle, &ci, nullptr, &object)); + return Fence(object, handle, *dld); +} + +DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const { + VkDescriptorPool object; + Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object)); + return DescriptorPool(object, handle, *dld); +} + +RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const { + VkRenderPass object; + Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object)); + return RenderPass(object, handle, *dld); +} + +DescriptorSetLayout Device::CreateDescriptorSetLayout( + const VkDescriptorSetLayoutCreateInfo& ci) const { + VkDescriptorSetLayout object; + Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object)); + return DescriptorSetLayout(object, handle, *dld); +} + +PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { + VkPipelineLayout object; + Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); + return PipelineLayout(object, handle, *dld); +} + +Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const { + VkSampler object; + Check(dld->vkCreateSampler(handle, &ci, nullptr, &object)); + return Sampler(object, handle, *dld); +} + +Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const { + VkFramebuffer object; + Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object)); + return Framebuffer(object, handle, *dld); +} + +CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const { + VkCommandPool object; + Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object)); + return CommandPool(object, handle, *dld); +} + +DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const { + VkDescriptorUpdateTemplateKHR object; + Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object)); + return DescriptorUpdateTemplateKHR(object, handle, *dld); +} + +QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const { + VkQueryPool object; + Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object)); + return QueryPool(object, handle, *dld); +} + +ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const { + VkShaderModule object; + Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object)); + return ShaderModule(object, handle, *dld); +} + +Event Device::CreateEvent() const { + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + + VkEvent object; + Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); + return Event(object, handle, *dld); +} + +SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { + VkSwapchainKHR object; + Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); + return SwapchainKHR(object, handle, *dld); +} + +DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept { + VkDeviceMemory memory; + if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) { + return {}; + } + return DeviceMemory(memory, handle, *dld); +} + +DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { + VkDeviceMemory memory; + Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory)); + return DeviceMemory(memory, handle, *dld); +} + +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); + return requirements; +} + +VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetImageMemoryRequirements(handle, image, &requirements); + return requirements; +} + +void Device::UpdateDescriptorSets(Span writes, + Span copies) const noexcept { + dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data()); +} + +VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept { + VkPhysicalDeviceProperties properties; + dld->vkGetPhysicalDeviceProperties(physical_device, &properties); + return properties; +} + +void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept { + dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties); +} + +VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept { + VkPhysicalDeviceFeatures2KHR features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + features2.pNext = nullptr; + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2); + return features2.features; +} + +void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept { + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features); +} + +VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept { + VkFormatProperties properties; + dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties); + return properties; +} + +std::vector PhysicalDevice::EnumerateDeviceExtensionProperties() const { + u32 num; + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr); + std::vector properties(num); + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data()); + return properties; +} + +std::vector PhysicalDevice::GetQueueFamilyProperties() const { + u32 num; + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr); + std::vector properties(num); + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data()); + return properties; +} + +bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const { + VkBool32 supported; + Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface, + &supported)); + return supported == VK_TRUE; +} + +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const { + VkSurfaceCapabilitiesKHR capabilities; + Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); + return capabilities; +} + +std::vector PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr)); + std::vector formats(num); + Check( + dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data())); + return formats; +} + +std::vector PhysicalDevice::GetSurfacePresentModesKHR( + VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr)); + std::vector modes(num); + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, + modes.data())); + return modes; +} + +VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { + VkPhysicalDeviceMemoryProperties properties; + dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); + return properties; +} + +u32 AvailableVersion(const InstanceDispatch& dld) noexcept { + PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; + if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) { + // If the procedure is not found, Vulkan 1.0 is assumed + return VK_API_VERSION_1_0; + } + u32 version; + if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1", + ToString(result)); + return VK_API_VERSION_1_1; + } + return version; +} + +std::optional> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector properties(num); + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) != + VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + +std::optional> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector properties(num); + if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + +} // namespace Vulkan::vk diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h new file mode 100644 index 000000000..f9a184e00 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -0,0 +1,1213 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VK_NO_PROTOTYPES +#include + +#include "common/common_types.h" + +#ifdef _MSC_VER +#pragma warning(disable : 26812) // Disable prefer enum class over enum +#endif + +namespace Vulkan::vk { + +/** + * Span for Vulkan arrays. + * Based on std::span but optimized for array access instead of iterators. + * Size returns uint32_t instead of size_t to ease interaction with Vulkan functions. + */ +template +class Span { +public: + using value_type = T; + using size_type = u32; + using difference_type = std::ptrdiff_t; + using reference = const T&; + using const_reference = const T&; + using pointer = const T*; + using const_pointer = const T*; + using iterator = const T*; + using const_iterator = const T*; + + /// Construct an empty span. + constexpr Span() noexcept = default; + + /// Construct an empty span + constexpr Span(std::nullptr_t) noexcept {} + + /// Construct a span from a single element. + constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} + + /// Construct a span from a range. + template + // requires std::data(const Range&) + // requires std::size(const Range&) + constexpr Span(const Range& range) : ptr{std::data(range)}, num{std::size(range)} {} + + /// Construct a span from a pointer and a size. + /// This is inteded for subranges. + constexpr Span(const T* ptr_, std::size_t num_) noexcept : ptr{ptr_}, num{num_} {} + + /// Returns the data pointer by the span. + constexpr const T* data() const noexcept { + return ptr; + } + + /// Returns the number of elements in the span. + /// @note Returns a 32 bits integer because most Vulkan functions expect this type. + constexpr u32 size() const noexcept { + return static_cast(num); + } + + /// Returns true when the span is empty. + constexpr bool empty() const noexcept { + return num == 0; + } + + /// Returns a reference to the element in the passed index. + /// @pre: index < size() + constexpr const T& operator[](std::size_t index) const noexcept { + return ptr[index]; + } + + /// Returns an iterator to the beginning of the span. + constexpr const T* begin() const noexcept { + return ptr; + } + + /// Returns an iterator to the end of the span. + constexpr const T* end() const noexcept { + return ptr + num; + } + + /// Returns an iterator to the beginning of the span. + constexpr const T* cbegin() const noexcept { + return ptr; + } + + /// Returns an iterator to the end of the span. + constexpr const T* cend() const noexcept { + return ptr + num; + } + +private: + const T* ptr = nullptr; + std::size_t num = 0; +}; + +/// Vulkan exception generated from a VkResult. +class Exception final : public std::exception { +public: + /// Construct the exception with a result. + /// @pre result != VK_SUCCESS + explicit Exception(VkResult result_) : result{result_} {} + virtual ~Exception() = default; + + const char* what() const noexcept override; + +private: + VkResult result; +}; + +/// Converts a VkResult enum into a rodata string +const char* ToString(VkResult) noexcept; + +/// Throws a Vulkan exception if result is not success. +inline void Check(VkResult result) { + if (result != VK_SUCCESS) { + throw Exception(result); + } +} + +/// Throws a Vulkan exception if result is an error. +/// @return result +inline VkResult Filter(VkResult result) { + if (result < 0) { + throw Exception(result); + } + return result; +} + +/// Table holding Vulkan instance function pointers. +struct InstanceDispatch { + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + + PFN_vkCreateInstance vkCreateInstance; + PFN_vkDestroyInstance vkDestroyInstance; + PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; + + PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; + PFN_vkCreateDevice vkCreateDevice; + PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT; + PFN_vkDestroyDevice vkDestroyDevice; + PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; + PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties; + PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; + PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; + PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties; + PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; + PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; +}; + +/// Table holding Vulkan device function pointers. +struct DeviceDispatch : public InstanceDispatch { + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; + PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets; + PFN_vkAllocateMemory vkAllocateMemory; + PFN_vkBeginCommandBuffer vkBeginCommandBuffer; + PFN_vkBindBufferMemory vkBindBufferMemory; + PFN_vkBindImageMemory vkBindImageMemory; + PFN_vkCmdBeginQuery vkCmdBeginQuery; + PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; + PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; + PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; + PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; + PFN_vkCmdBindPipeline vkCmdBindPipeline; + PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT; + PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; + PFN_vkCmdBlitImage vkCmdBlitImage; + PFN_vkCmdClearAttachments vkCmdClearAttachments; + PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; + PFN_vkCmdCopyImage vkCmdCopyImage; + PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer; + PFN_vkCmdDispatch vkCmdDispatch; + PFN_vkCmdDraw vkCmdDraw; + PFN_vkCmdDrawIndexed vkCmdDrawIndexed; + PFN_vkCmdEndQuery vkCmdEndQuery; + PFN_vkCmdEndRenderPass vkCmdEndRenderPass; + PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; + PFN_vkCmdFillBuffer vkCmdFillBuffer; + PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; + PFN_vkCmdPushConstants vkCmdPushConstants; + PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; + PFN_vkCmdSetDepthBias vkCmdSetDepthBias; + PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; + PFN_vkCmdSetEvent vkCmdSetEvent; + PFN_vkCmdSetScissor vkCmdSetScissor; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; + PFN_vkCmdSetViewport vkCmdSetViewport; + PFN_vkCmdWaitEvents vkCmdWaitEvents; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; + PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; + PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; + PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; + PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; + PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; + PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; + PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; + PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; + PFN_vkCmdResolveImage vkCmdResolveImage; + PFN_vkCreateBuffer vkCreateBuffer; + PFN_vkCreateBufferView vkCreateBufferView; + PFN_vkCreateCommandPool vkCreateCommandPool; + PFN_vkCreateComputePipelines vkCreateComputePipelines; + PFN_vkCreateDescriptorPool vkCreateDescriptorPool; + PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkCreateEvent vkCreateEvent; + PFN_vkCreateFence vkCreateFence; + PFN_vkCreateFramebuffer vkCreateFramebuffer; + PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; + PFN_vkCreateImage vkCreateImage; + PFN_vkCreateImageView vkCreateImageView; + PFN_vkCreatePipelineLayout vkCreatePipelineLayout; + PFN_vkCreateQueryPool vkCreateQueryPool; + PFN_vkCreateRenderPass vkCreateRenderPass; + PFN_vkCreateSampler vkCreateSampler; + PFN_vkCreateSemaphore vkCreateSemaphore; + PFN_vkCreateShaderModule vkCreateShaderModule; + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroyBuffer vkDestroyBuffer; + PFN_vkDestroyBufferView vkDestroyBufferView; + PFN_vkDestroyCommandPool vkDestroyCommandPool; + PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; + PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkDestroyEvent vkDestroyEvent; + PFN_vkDestroyFence vkDestroyFence; + PFN_vkDestroyFramebuffer vkDestroyFramebuffer; + PFN_vkDestroyImage vkDestroyImage; + PFN_vkDestroyImageView vkDestroyImageView; + PFN_vkDestroyPipeline vkDestroyPipeline; + PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; + PFN_vkDestroyQueryPool vkDestroyQueryPool; + PFN_vkDestroyRenderPass vkDestroyRenderPass; + PFN_vkDestroySampler vkDestroySampler; + PFN_vkDestroySemaphore vkDestroySemaphore; + PFN_vkDestroyShaderModule vkDestroyShaderModule; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkDeviceWaitIdle vkDeviceWaitIdle; + PFN_vkEndCommandBuffer vkEndCommandBuffer; + PFN_vkFreeCommandBuffers vkFreeCommandBuffers; + PFN_vkFreeDescriptorSets vkFreeDescriptorSets; + PFN_vkFreeMemory vkFreeMemory; + PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + PFN_vkGetDeviceQueue vkGetDeviceQueue; + PFN_vkGetEventStatus vkGetEventStatus; + PFN_vkGetFenceStatus vkGetFenceStatus; + PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + PFN_vkGetQueryPoolResults vkGetQueryPoolResults; + PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; + PFN_vkMapMemory vkMapMemory; + PFN_vkQueueSubmit vkQueueSubmit; + PFN_vkResetFences vkResetFences; + PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; + PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; + PFN_vkUnmapMemory vkUnmapMemory; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; + PFN_vkWaitForFences vkWaitForFences; + PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; +}; + +/// Loads instance agnostic function pointers. +/// @return True on success, false on error. +bool Load(InstanceDispatch&) noexcept; + +/// Loads instance function pointers. +/// @return True on success, false on error. +bool Load(VkInstance, InstanceDispatch&) noexcept; + +void Destroy(VkInstance, const InstanceDispatch&) noexcept; +void Destroy(VkDevice, const InstanceDispatch&) noexcept; + +void Destroy(VkDevice, VkBuffer, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkBufferView, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkCommandPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSampler, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; +void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; +void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; + +VkResult Free(VkDevice, VkDescriptorPool, Span, const DeviceDispatch&) noexcept; +VkResult Free(VkDevice, VkCommandPool, Span, const DeviceDispatch&) noexcept; + +template +class Handle; + +/// Handle with an owning type. +/// Analogue to std::unique_ptr. +template +class Handle { +public: + /// Construct a handle and hold it's ownership. + explicit Handle(Type handle_, OwnerType owner_, const Dispatch& dld_) noexcept + : handle{handle_}, owner{owner_}, dld{&dld_} {} + + /// Construct an empty handle. + Handle() = default; + + /// Copying Vulkan objects is not supported and will never be. + Handle(const Handle&) = delete; + Handle& operator=(const Handle&) = delete; + + /// Construct a handle transfering the ownership from another handle. + Handle(Handle&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, dld{rhs.dld} {} + + /// Assign the current handle transfering the ownership from another handle. + /// Destroys any previously held object. + Handle& operator=(Handle&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + owner = rhs.owner; + dld = rhs.dld; + return *this; + } + + /// Destroys the current handle if it existed. + ~Handle() noexcept { + Release(); + } + + /// Destroys any held object. + void reset() noexcept { + Release(); + handle = nullptr; + } + + /// Returns the address of the held object. + /// Intended for Vulkan structures that expect a pointer to an array. + const Type* address() const noexcept { + return std::addressof(handle); + } + + /// Returns the held Vulkan handle. + Type operator*() const noexcept { + return handle; + } + + /// Returns true when there's a held object. + explicit operator bool() const noexcept { + return handle != nullptr; + } + +protected: + Type handle = nullptr; + OwnerType owner = nullptr; + const Dispatch* dld = nullptr; + +private: + /// Destroys the held object if it exists. + void Release() noexcept { + if (handle) { + Destroy(owner, handle, *dld); + } + } +}; + +/// Dummy type used to specify a handle has no owner. +struct NoOwner {}; + +/// Handle without an owning type. +/// Analogue to std::unique_ptr +template +class Handle { +public: + /// Construct a handle and hold it's ownership. + explicit Handle(Type handle_, const Dispatch& dld_) noexcept : handle{handle_}, dld{&dld_} {} + + /// Construct an empty handle. + Handle() noexcept = default; + + /// Copying Vulkan objects is not supported and will never be. + Handle(const Handle&) = delete; + Handle& operator=(const Handle&) = delete; + + /// Construct a handle transfering ownership from another handle. + Handle(Handle&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)}, dld{rhs.dld} {} + + /// Assign the current handle transfering the ownership from another handle. + /// Destroys any previously held object. + Handle& operator=(Handle&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + dld = rhs.dld; + return *this; + } + + /// Destroys the current handle if it existed. + ~Handle() noexcept { + Release(); + } + + /// Destroys any held object. + void reset() noexcept { + Release(); + handle = nullptr; + } + + /// Returns the address of the held object. + /// Intended for Vulkan structures that expect a pointer to an array. + const Type* address() const noexcept { + return std::addressof(handle); + } + + /// Returns the held Vulkan handle. + Type operator*() const noexcept { + return handle; + } + + /// Returns true when there's a held object. + operator bool() const noexcept { + return handle != nullptr; + } + +protected: + Type handle = nullptr; + const Dispatch* dld = nullptr; + +private: + /// Destroys the held object if it exists. + void Release() noexcept { + if (handle) { + Destroy(handle, *dld); + } + } +}; + +/// Array of a pool allocation. +/// Analogue to std::vector +template +class PoolAllocations { +public: + /// Construct an empty allocation. + PoolAllocations() = default; + + /// Construct an allocation. Errors are reported through IsOutOfPoolMemory(). + explicit PoolAllocations(std::unique_ptr allocations_, std::size_t num_, + VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept + : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_}, + dld{&dld_} {} + + /// Copying Vulkan allocations is not supported and will never be. + PoolAllocations(const PoolAllocations&) = delete; + PoolAllocations& operator=(const PoolAllocations&) = delete; + + /// Construct an allocation transfering ownership from another allocation. + PoolAllocations(PoolAllocations&& rhs) noexcept + : allocations{std::move(rhs.allocations)}, num{rhs.num}, device{rhs.device}, pool{rhs.pool}, + dld{rhs.dld} {} + + /// Assign an allocation transfering ownership from another allocation. + /// Releases any previously held allocation. + PoolAllocations& operator=(PoolAllocations&& rhs) noexcept { + Release(); + allocations = std::move(rhs.allocations); + num = rhs.num; + device = rhs.device; + pool = rhs.pool; + dld = rhs.dld; + return *this; + } + + /// Destroys any held allocation. + ~PoolAllocations() { + Release(); + } + + /// Returns the number of allocations. + std::size_t size() const noexcept { + return num; + } + + /// Returns a pointer to the array of allocations. + AllocationType const* data() const noexcept { + return allocations.get(); + } + + /// Returns the allocation in the specified index. + /// @pre index < size() + AllocationType operator[](std::size_t index) const noexcept { + return allocations[index]; + } + + /// True when a pool fails to construct. + bool IsOutOfPoolMemory() const noexcept { + return !device; + } + +private: + /// Destroys the held allocations if they exist. + void Release() noexcept { + if (!allocations) { + return; + } + const Span span(allocations.get(), num); + const VkResult result = Free(device, pool, span, *dld); + // There's no way to report errors from a destructor. + if (result != VK_SUCCESS) { + std::terminate(); + } + } + + std::unique_ptr allocations; + std::size_t num = 0; + VkDevice device = nullptr; + PoolType pool = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +using DebugCallback = Handle; +using DescriptorSetLayout = Handle; +using DescriptorUpdateTemplateKHR = Handle; +using Pipeline = Handle; +using PipelineLayout = Handle; +using QueryPool = Handle; +using RenderPass = Handle; +using Sampler = Handle; +using SurfaceKHR = Handle; + +using DescriptorSets = PoolAllocations; +using CommandBuffers = PoolAllocations; + +/// Vulkan instance owning handle. +class Instance : public Handle { + using Handle::Handle; + +public: + /// Creates a Vulkan instance. Use "operator bool" for error handling. + static Instance Create(u32 version, Span layers, Span extensions, + InstanceDispatch& dispatch) noexcept; + + /// Enumerates physical devices. + /// @return Physical devices and an empty handle on failure. + std::optional> EnumeratePhysicalDevices(); + + /// Tries to create a debug callback messenger. Returns an empty handle on failure. + DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; +}; + +class Queue { +public: + /// Construct an empty queue handle. + constexpr Queue() noexcept = default; + + /// Construct a queue handle. + constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept + : queue{queue_}, dld{&dld_} {} + + VkResult Submit(Span submit_infos, + VkFence fence = VK_NULL_HANDLE) const noexcept { + return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); + } + + VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { + return dld->vkQueuePresentKHR(queue, &present_info); + } + +private: + VkQueue queue = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +class Buffer : public Handle { + using Handle::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class BufferView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class Image : public Handle { + using Handle::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class ImageView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class DeviceMemory : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { + void* data; + Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); + return static_cast(data); + } + + void Unmap() const noexcept { + dld->vkUnmapMemory(owner, handle); + } +}; + +class Fence : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + VkResult Wait(u64 timeout = std::numeric_limits::max()) const noexcept { + return dld->vkWaitForFences(owner, 1, &handle, true, timeout); + } + + VkResult GetStatus() const noexcept { + return dld->vkGetFenceStatus(owner, handle); + } + + void Reset() const { + Check(dld->vkResetFences(owner, 1, &handle)); + } +}; + +class Framebuffer : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class DescriptorPool : public Handle { + using Handle::Handle; + +public: + DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class CommandPool : public Handle { + using Handle::Handle; + +public: + CommandBuffers Allocate(std::size_t num_buffers, + VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class SwapchainKHR : public Handle { + using Handle::Handle; + +public: + std::vector GetImages() const; +}; + +class Event : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + VkResult GetStatus() const noexcept { + return dld->vkGetEventStatus(owner, handle); + } +}; + +class ShaderModule : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class Semaphore : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + [[nodiscard]] u64 GetCounter() const { + u64 value; + Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); + return value; + } + + /** + * Waits for a timeline semaphore on the host. + * + * @param value Value to wait + * @param timeout Time in nanoseconds to timeout + * @return True on successful wait, false on timeout + */ + bool Wait(u64 value, u64 timeout = std::numeric_limits::max()) const { + const VkSemaphoreWaitInfoKHR wait_info{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .semaphoreCount = 1, + .pSemaphores = &handle, + .pValues = &value, + }; + const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout); + switch (result) { + case VK_SUCCESS: + return true; + case VK_TIMEOUT: + return false; + default: + throw Exception(result); + } + } +}; + +class Device : public Handle { + using Handle::Handle; + +public: + static Device Create(VkPhysicalDevice physical_device, Span queues_ci, + Span enabled_extensions, const void* next, + DeviceDispatch& dispatch) noexcept; + + Queue GetQueue(u32 family_index) const noexcept; + + Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; + + BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; + + Image CreateImage(const VkImageCreateInfo& ci) const; + + ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; + + Semaphore CreateSemaphore() const; + + Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const; + + Fence CreateFence(const VkFenceCreateInfo& ci) const; + + DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; + + RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const; + + DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; + + PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; + + Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; + + Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; + + Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; + + Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const; + + CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const; + + DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const; + + QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const; + + ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; + + Event CreateEvent() const; + + SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; + + DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; + + DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; + + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + + VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; + + void UpdateDescriptorSets(Span writes, + Span copies) const noexcept; + + void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template, + const void* data) const noexcept { + dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data); + } + + VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore, + VkFence fence, u32* image_index) const noexcept { + return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence, + image_index); + } + + VkResult WaitIdle() const noexcept { + return dld->vkDeviceWaitIdle(handle); + } + + void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept { + dld->vkResetQueryPoolEXT(handle, query_pool, first, count); + } + + VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, + void* data, VkDeviceSize stride, + VkQueryResultFlags flags) const noexcept { + return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, + flags); + } +}; + +class PhysicalDevice { +public: + constexpr PhysicalDevice() noexcept = default; + + constexpr PhysicalDevice(VkPhysicalDevice physical_device_, + const InstanceDispatch& dld_) noexcept + : physical_device{physical_device_}, dld{&dld_} {} + + constexpr operator VkPhysicalDevice() const noexcept { + return physical_device; + } + + VkPhysicalDeviceProperties GetProperties() const noexcept; + + void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept; + + VkPhysicalDeviceFeatures GetFeatures() const noexcept; + + void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept; + + VkFormatProperties GetFormatProperties(VkFormat) const noexcept; + + std::vector EnumerateDeviceExtensionProperties() const; + + std::vector GetQueueFamilyProperties() const; + + bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; + + VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const; + + std::vector GetSurfaceFormatsKHR(VkSurfaceKHR) const; + + std::vector GetSurfacePresentModesKHR(VkSurfaceKHR) const; + + VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; + +private: + VkPhysicalDevice physical_device = nullptr; + const InstanceDispatch* dld = nullptr; +}; + +class CommandBuffer { +public: + CommandBuffer() noexcept = default; + + explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept + : handle{handle_}, dld{&dld_} {} + + const VkCommandBuffer* address() const noexcept { + return &handle; + } + + void Begin(const VkCommandBufferBeginInfo& begin_info) const { + Check(dld->vkBeginCommandBuffer(handle, &begin_info)); + } + + void End() const { + Check(dld->vkEndCommandBuffer(handle)); + } + + void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi, + VkSubpassContents contents) const noexcept { + dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents); + } + + void EndRenderPass() const noexcept { + dld->vkCmdEndRenderPass(handle); + } + + void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept { + dld->vkCmdBeginQuery(handle, query_pool, query, flags); + } + + void EndQuery(VkQueryPool query_pool, u32 query) const noexcept { + dld->vkCmdEndQuery(handle, query_pool, query); + } + + void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first, + Span sets, Span dynamic_offsets) const noexcept { + dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(), + dynamic_offsets.size(), dynamic_offsets.data()); + } + + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { + dld->vkCmdBindPipeline(handle, bind_point, pipeline); + } + + void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) const noexcept { + dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); + } + + void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets) const noexcept { + dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets); + } + + void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept { + BindVertexBuffers(binding, 1, &buffer, &offset); + } + + void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, + u32 first_instance) const noexcept { + dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); + } + + void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset, + u32 first_instance) const noexcept { + dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset, + first_instance); + } + + void ClearAttachments(Span attachments, + Span rects) const noexcept { + dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), + rects.data()); + } + + void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span regions, + VkFilter filter) const noexcept { + dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data(), filter); + } + + void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span regions) { + dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + + void Dispatch(u32 x, u32 y, u32 z) const noexcept { + dld->vkCmdDispatch(handle, x, y, z); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, Span memory_barriers, + Span buffer_barriers, + Span image_barriers) const noexcept { + dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags, + memory_barriers.size(), memory_barriers.data(), + buffer_barriers.size(), buffer_barriers.data(), + image_barriers.size(), image_barriers.data()); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags = 0) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkBufferMemoryBarrier& buffer_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkImageMemoryBarrier& image_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); + } + + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, + Span regions) const noexcept { + dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), + regions.data()); + } + + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + Span regions) const noexcept { + dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); + } + + void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span regions) const noexcept { + dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + + void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer, + Span regions) const noexcept { + dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(), + regions.data()); + } + + void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, + u32 data) const noexcept { + dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); + } + + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size, + const void* values) const noexcept { + dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); + } + + template + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, + const T& data) const noexcept { + static_assert(std::is_trivially_copyable_v, " is not trivially copyable"); + dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast(sizeof(T)), &data); + } + + void SetViewport(u32 first, Span viewports) const noexcept { + dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); + } + + void SetScissor(u32 first, Span scissors) const noexcept { + dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data()); + } + + void SetBlendConstants(const float blend_constants[4]) const noexcept { + dld->vkCmdSetBlendConstants(handle, blend_constants); + } + + void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept { + dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask); + } + + void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept { + dld->vkCmdSetStencilReference(handle, face_mask, reference); + } + + void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept { + dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask); + } + + void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept { + dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor); + } + + void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept { + dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); + } + + void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept { + dld->vkCmdSetEvent(handle, event, stage_flags); + } + + void WaitEvents(Span events, VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask, Span memory_barriers, + Span buffer_barriers, + Span image_barriers) const noexcept { + dld->vkCmdWaitEvents(handle, events.size(), events.data(), src_stage_mask, dst_stage_mask, + memory_barriers.size(), memory_barriers.data(), buffer_barriers.size(), + buffer_barriers.data(), image_barriers.size(), image_barriers.data()); + } + + void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + const VkDeviceSize* strides) const noexcept { + dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, + sizes, strides); + } + + void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { + dld->vkCmdSetCullModeEXT(handle, cull_mode); + } + + void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { + dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); + } + + void SetDepthTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthWriteEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { + dld->vkCmdSetFrontFaceEXT(handle, front_face); + } + + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { + dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); + } + + void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, + VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { + dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); + } + + void SetStencilTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) const noexcept { + dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); + } + + void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + + void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + + void BeginDebugUtilsLabelEXT(const char* label, std::span color) const noexcept { + const VkDebugUtilsLabelEXT label_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = label, + .color{color[0], color[1], color[2], color[3]}, + }; + dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + } + + void EndDebugUtilsLabelEXT() const noexcept { + dld->vkCmdEndDebugUtilsLabelEXT(handle); + } + +private: + VkCommandBuffer handle; + const DeviceDispatch* dld; +}; + +u32 AvailableVersion(const InstanceDispatch& dld) noexcept; + +std::optional> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld); + +std::optional> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld); + +} // namespace Vulkan::vk -- cgit v1.2.3 From 25f88d99cead2f7f6fdbf5e36e7578472aaa65bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Dec 2020 22:05:48 -0300 Subject: renderer_vulkan: Move instance initialization to a separate file Simplify Vulkan's backend initialization code by moving it to a separate file, allowing us to initialize a Vulkan instance from different backends. --- src/video_core/CMakeLists.txt | 2 + src/video_core/renderer_vulkan/renderer_vulkan.cpp | 112 +-------------- src/video_core/vulkan_common/vulkan_instance.cpp | 152 +++++++++++++++++++++ src/video_core/vulkan_common/vulkan_instance.h | 21 +++ 4 files changed, 176 insertions(+), 111 deletions(-) create mode 100644 src/video_core/vulkan_common/vulkan_instance.cpp create mode 100644 src/video_core/vulkan_common/vulkan_instance.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e19632bb1..f8c36947a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -258,6 +258,8 @@ add_library(video_core STATIC textures/texture.h video_core.cpp video_core.h + vulkan_common/vulkan_instance.cpp + vulkan_common/vulkan_instance.h vulkan_common/vulkan_library.cpp vulkan_common/vulkan_library.h vulkan_common/vulkan_wrapper.cpp diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6e267f89d..82619bc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -29,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -46,11 +47,7 @@ #endif namespace Vulkan { - namespace { - -using Core::Frontend::WindowSystemType; - VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, @@ -69,109 +66,6 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, return VK_FALSE; } -std::pair CreateInstance( - Common::DynamicLibrary& library, vk::InstanceDispatch& dld, - WindowSystemType window_type = WindowSystemType::Headless, bool enable_debug_utils = false, - bool enable_layers = false) { - if (!library.IsOpen()) { - LOG_ERROR(Render_Vulkan, "Vulkan library not available"); - return {}; - } - if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { - LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); - return {}; - } - if (!vk::Load(dld)) { - LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); - return {}; - } - - std::vector extensions; - extensions.reserve(6); - switch (window_type) { - case Core::Frontend::WindowSystemType::Headless: - break; -#ifdef _WIN32 - case Core::Frontend::WindowSystemType::Windows: - extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); - break; -#endif -#if !defined(_WIN32) && !defined(__APPLE__) - case Core::Frontend::WindowSystemType::X11: - extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); - break; - case Core::Frontend::WindowSystemType::Wayland: - extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); - break; -#endif - default: - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - break; - } - if (window_type != Core::Frontend::WindowSystemType::Headless) { - extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); - } - if (enable_debug_utils) { - extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - - const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); - if (!properties) { - LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); - return {}; - } - - for (const char* extension : extensions) { - const auto it = - std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { - return !std::strcmp(extension, prop.extensionName); - }); - if (it == properties->end()) { - LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); - return {}; - } - } - - std::vector layers; - layers.reserve(1); - if (enable_layers) { - layers.push_back("VK_LAYER_KHRONOS_validation"); - } - - const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); - if (!layer_properties) { - LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); - layers.clear(); - } - - for (auto layer_it = layers.begin(); layer_it != layers.end();) { - const char* const layer = *layer_it; - const auto it = std::find_if( - layer_properties->begin(), layer_properties->end(), - [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); - if (it == layer_properties->end()) { - LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); - layer_it = layers.erase(layer_it); - } else { - ++layer_it; - } - } - - // Limit the maximum version of Vulkan to avoid using untested version. - const u32 version = std::min(vk::AvailableVersion(dld), static_cast(VK_API_VERSION_1_1)); - - vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); - if (!instance) { - LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); - return {}; - } - if (!vk::Load(*instance, dld)) { - LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); - } - return std::make_pair(std::move(instance), version); -} - std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -194,7 +88,6 @@ std::string GetDriverVersion(const VKDevice& device) { const u32 minor = version & 0x3fff; return fmt::format("{}.{}", major, minor); } - return GetReadableVersion(version); } @@ -233,7 +126,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; @@ -429,12 +321,10 @@ std::vector RendererVulkan::EnumerateDevices() { if (!instance) { return {}; } - const std::optional physical_devices = instance.EnumeratePhysicalDevices(); if (!physical_devices) { return {}; } - std::vector names; names.reserve(physical_devices->size()); for (const auto& device : *physical_devices) { diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp new file mode 100644 index 000000000..c19f93e0a --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -0,0 +1,152 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/dynamic_library.h" +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +// Include these late to avoid polluting previous headers +#ifdef _WIN32 +#include +// ensure include order +#include +#endif + +#if !defined(_WIN32) && !defined(__APPLE__) +#include +#include +#include +#endif + +namespace Vulkan { +namespace { +[[nodiscard]] std::vector RequiredExtensions( + Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { + std::vector extensions; + extensions.reserve(6); + switch (window_type) { + case Core::Frontend::WindowSystemType::Headless: + break; +#ifdef _WIN32 + case Core::Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#endif +#if !defined(_WIN32) && !defined(__APPLE__) + case Core::Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Core::Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + if (window_type != Core::Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + if (enable_debug_utils) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + return extensions; +} + +[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, + std::span extensions) { + const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); + if (!properties) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return false; + } + for (const char* extension : extensions) { + const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { + return std::strcmp(extension, prop.extensionName) == 0; + }); + if (it == properties->end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return false; + } + } + return true; +} + +[[nodiscard]] std::vector Layers(bool enable_layers) { + std::vector layers; + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + return layers; +} + +void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector& layers) { + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + std::erase_if(layers, [&layer_properties](const char* layer) { + const auto comp = [layer](const VkLayerProperties& layer_property) { + return std::strcmp(layer, layer_property.layerName) == 0; + }; + const auto it = std::ranges::find_if(*layer_properties, comp); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + return true; + } + return false; + }); +} +} // Anonymous namespace + +std::pair CreateInstance(Common::DynamicLibrary& library, + vk::InstanceDispatch& dld, + Core::Frontend::WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { + if (!library.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Vulkan library not available"); + return {}; + } + if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { + LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); + return {}; + } + if (!vk::Load(dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); + return {}; + } + const std::vector extensions = RequiredExtensions(window_type, enable_debug_utils); + if (!AreExtensionsSupported(dld, extensions)) { + return {}; + } + + std::vector layers = Layers(enable_layers); + RemoveUnavailableLayers(dld, layers); + + // Limit the maximum version of Vulkan to avoid using untested version. + const u32 version = std::min(vk::AvailableVersion(dld), VK_API_VERSION_1_1); + + vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); + if (!instance) { + LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); + return {}; + } + if (!vk::Load(*instance, dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); + } + return std::make_pair(std::move(instance), version); +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h new file mode 100644 index 000000000..ff2be0a48 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -0,0 +1,21 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "common/dynamic_library.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +[[nodiscard]] std::pair CreateInstance( + Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, + bool enable_debug_utils = false, bool enable_layers = false); + +} // namespace Vulkan -- cgit v1.2.3 From 47843b4f097ced5e99c5567b8ac3fd53b80fab0a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 02:01:13 -0300 Subject: renderer_vulkan: Create debug callback on separate file and throw Initialize debug callbacks (messenger) from a separate file. This allows sharing code with different backends. Change our Vulkan error handling to use exceptions instead of error codes, simplifying the initialization process. --- src/video_core/CMakeLists.txt | 2 + src/video_core/renderer_vulkan/renderer_vulkan.cpp | 37 +++--------------- src/video_core/renderer_vulkan/renderer_vulkan.h | 4 +- .../vulkan_common/vulkan_debug_callback.cpp | 45 ++++++++++++++++++++++ .../vulkan_common/vulkan_debug_callback.h | 11 ++++++ src/video_core/vulkan_common/vulkan_instance.cpp | 14 +++---- src/video_core/vulkan_common/vulkan_wrapper.cpp | 39 +++++-------------- src/video_core/vulkan_common/vulkan_wrapper.h | 15 +++++--- 8 files changed, 88 insertions(+), 79 deletions(-) create mode 100644 src/video_core/vulkan_common/vulkan_debug_callback.cpp create mode 100644 src/video_core/vulkan_common/vulkan_debug_callback.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f8c36947a..9287faee1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -258,6 +258,8 @@ add_library(video_core STATIC textures/texture.h video_core.cpp video_core.h + vulkan_common/vulkan_debug_callback.cpp + vulkan_common/vulkan_debug_callback.h vulkan_common/vulkan_instance.cpp vulkan_common/vulkan_instance.h vulkan_common/vulkan_library.cpp diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 82619bc61..8e01dc191 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -29,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -48,24 +49,6 @@ namespace Vulkan { namespace { -VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, - VkDebugUtilsMessageTypeFlagsEXT type, - const VkDebugUtilsMessengerCallbackDataEXT* data, - [[maybe_unused]] void* user_data) { - const char* const message{data->pMessage}; - - if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { - LOG_CRITICAL(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { - LOG_WARNING(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { - LOG_INFO(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { - LOG_DEBUG(Render_Vulkan, "{}", message); - } - return VK_FALSE; -} - std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -158,7 +141,11 @@ bool RendererVulkan::Init() { library = OpenLibrary(); std::tie(instance, instance_version) = CreateInstance( library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); - if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { + if (Settings::values.renderer_debug) { + debug_callback = CreateDebugCallback(instance); + } + + if (!CreateSurface() || !PickDevices()) { return false; } @@ -201,18 +188,6 @@ void RendererVulkan::ShutDown() { device.reset(); } -bool RendererVulkan::CreateDebugCallback() { - if (!Settings::values.renderer_debug) { - return true; - } - debug_callback = instance.TryCreateDebugCallback(DebugCallback); - if (!debug_callback) { - LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); - return false; - } - return true; -} - bool RendererVulkan::CreateSurface() { [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); VkSurfaceKHR unsafe_surface = nullptr; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 4a0abfaad..fa7628d0e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,8 +56,6 @@ public: static std::vector EnumerateDevices(); private: - bool CreateDebugCallback(); - bool CreateSurface(); bool PickDevices(); @@ -78,7 +76,7 @@ private: VKScreenInfo screen_info; - vk::DebugCallback debug_callback; + vk::DebugUtilsMessenger debug_callback; std::unique_ptr device; std::unique_ptr memory_manager; std::unique_ptr state_tracker; diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp new file mode 100644 index 000000000..ea7af8ad4 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/logging/log.h" +#include "video_core/vulkan_common/vulkan_debug_callback.h" + +namespace Vulkan { +namespace { +VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT type, + const VkDebugUtilsMessengerCallbackDataEXT* data, + [[maybe_unused]] void* user_data) { + const std::string_view message{data->pMessage}; + if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + LOG_CRITICAL(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { + LOG_WARNING(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { + LOG_INFO(Render_Vulkan, "{}", message); + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { + LOG_DEBUG(Render_Vulkan, "{}", message); + } + return VK_FALSE; +} +} // Anonymous namespace + +vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { + return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = Callback, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h new file mode 100644 index 000000000..2efcd244c --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h @@ -0,0 +1,11 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index c19f93e0a..d3d8630e5 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -117,21 +117,20 @@ std::pair CreateInstance(Common::DynamicLibrary& library, bool enable_debug_utils, bool enable_layers) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); - return {}; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); - return {}; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } if (!vk::Load(dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); - return {}; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } const std::vector extensions = RequiredExtensions(window_type, enable_debug_utils); if (!AreExtensionsSupported(dld, extensions)) { - return {}; + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - std::vector layers = Layers(enable_layers); RemoveUnavailableLayers(dld, layers); @@ -139,12 +138,9 @@ std::pair CreateInstance(Common::DynamicLibrary& library, const u32 version = std::min(vk::AvailableVersion(dld), VK_API_VERSION_1_1); vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); - if (!instance) { - LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); - return {}; - } if (!vk::Load(*instance, dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } return std::make_pair(std::move(instance), version); } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 478402bbd..f4177537b 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -435,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span buffe } Instance Instance::Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dispatch) noexcept { + InstanceDispatch& dispatch) { const VkApplicationInfo application_info{ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = nullptr, @@ -455,22 +455,17 @@ Instance Instance::Create(u32 version, Span layers, Span> Instance::EnumeratePhysicalDevices() { +std::optional> Instance::EnumeratePhysicalDevices() const { u32 num; if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { return std::nullopt; @@ -483,27 +478,11 @@ std::optional> Instance::EnumeratePhysicalDevices( return std::make_optional(std::move(physical_devices)); } -DebugCallback Instance::TryCreateDebugCallback( - PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - const VkDebugUtilsMessengerCreateInfoEXT ci{ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - .pNext = nullptr, - .flags = 0, - .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, - .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, - .pfnUserCallback = callback, - .pUserData = nullptr, - }; - - VkDebugUtilsMessengerEXT messenger; - if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { - return {}; - } - return DebugCallback(messenger, handle, *dld); +DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( + const VkDebugUtilsMessengerCreateInfoEXT& create_info) const { + VkDebugUtilsMessengerEXT object; + Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object)); + return DebugUtilsMessenger(object, handle, *dld); } void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index f9a184e00..03ca97ac0 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -555,7 +555,7 @@ private: const DeviceDispatch* dld = nullptr; }; -using DebugCallback = Handle; +using DebugUtilsMessenger = Handle; using DescriptorSetLayout = Handle; using DescriptorUpdateTemplateKHR = Handle; using Pipeline = Handle; @@ -573,16 +573,19 @@ class Instance : public Handle { using Handle::Handle; public: - /// Creates a Vulkan instance. Use "operator bool" for error handling. + /// Creates a Vulkan instance. + /// @throw Exception on initialization error. static Instance Create(u32 version, Span layers, Span extensions, - InstanceDispatch& dispatch) noexcept; + InstanceDispatch& dispatch); /// Enumerates physical devices. /// @return Physical devices and an empty handle on failure. - std::optional> EnumeratePhysicalDevices(); + std::optional> EnumeratePhysicalDevices() const; - /// Tries to create a debug callback messenger. Returns an empty handle on failure. - DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; + /// Creates a debug callback messenger. + /// @throw Exception on creation failure. + DebugUtilsMessenger CreateDebugUtilsMessenger( + const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; }; class Queue { -- cgit v1.2.3 From dce8720780d7fbbe4741a68ec11232d6ea304b06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 02:04:31 -0300 Subject: renderer_vulkan: Catch and report exceptions Move more Vulkan code to report errors with exceptions and report them through a log before notifying it with an error boolean for backwards compatibility. In the future we can replace the rasterizer two-step initialization to always use exceptions. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 8e01dc191..ccdc86ed7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -137,7 +137,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.OnFrameDisplayed(); } -bool RendererVulkan::Init() { +bool RendererVulkan::Init() try { library = OpenLibrary(); std::tie(instance, instance_version) = CreateInstance( library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); @@ -168,8 +168,11 @@ bool RendererVulkan::Init() { blit_screen = std::make_unique(cpu_memory, render_window, *rasterizer, *device, *memory_manager, *swapchain, *scheduler, screen_info); - return true; + +} catch (const vk::Exception& exception) { + LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); + return false; } void RendererVulkan::ShutDown() { -- cgit v1.2.3 From 11f0f7598df993c717752030c05f7b1eca3c762c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 02:14:15 -0300 Subject: renderer_vulkan: Initialize surface in separate file Move surface initialization code to a separate file. It's unlikely to use this code outside of Vulkan, but keeping platform-specific code (Win32, Xlib, Wayland) in its own translation unit keeps things cleaner. --- src/video_core/CMakeLists.txt | 2 + src/video_core/renderer_vulkan/renderer_vulkan.cpp | 74 +------------------- src/video_core/renderer_vulkan/renderer_vulkan.h | 2 - src/video_core/vulkan_common/vulkan_surface.cpp | 81 ++++++++++++++++++++++ src/video_core/vulkan_common/vulkan_surface.h | 18 +++++ src/video_core/vulkan_common/vulkan_wrapper.h | 5 ++ 6 files changed, 109 insertions(+), 73 deletions(-) create mode 100644 src/video_core/vulkan_common/vulkan_surface.cpp create mode 100644 src/video_core/vulkan_common/vulkan_surface.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9287faee1..f977cf12b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -264,6 +264,8 @@ add_library(video_core STATIC vulkan_common/vulkan_instance.h vulkan_common/vulkan_library.cpp vulkan_common/vulkan_library.h + vulkan_common/vulkan_surface.cpp + vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp vulkan_common/vulkan_wrapper.h ) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ccdc86ed7..831c204c2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -32,21 +32,9 @@ #include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" +#include "video_core/vulkan_common/vulkan_surface.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -// Include these late to avoid polluting previous headers -#ifdef _WIN32 -#include -// ensure include order -#include -#endif - -#if !defined(_WIN32) && !defined(__APPLE__) -#include -#include -#include -#endif - namespace Vulkan { namespace { std::string GetReadableVersion(u32 version) { @@ -144,8 +132,8 @@ bool RendererVulkan::Init() try { if (Settings::values.renderer_debug) { debug_callback = CreateDebugCallback(instance); } - - if (!CreateSurface() || !PickDevices()) { + surface = CreateSurface(instance, render_window); + if (!PickDevices()) { return false; } @@ -191,62 +179,6 @@ void RendererVulkan::ShutDown() { device.reset(); } -bool RendererVulkan::CreateSurface() { - [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); - VkSurfaceKHR unsafe_surface = nullptr; - -#ifdef _WIN32 - if (window_info.type == Core::Frontend::WindowSystemType::Windows) { - const HWND hWnd = static_cast(window_info.render_surface); - const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, - nullptr, 0, nullptr, hWnd}; - const auto vkCreateWin32SurfaceKHR = reinterpret_cast( - dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); - if (!vkCreateWin32SurfaceKHR || - vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); - return false; - } - } -#endif -#if !defined(_WIN32) && !defined(__APPLE__) - if (window_info.type == Core::Frontend::WindowSystemType::X11) { - const VkXlibSurfaceCreateInfoKHR xlib_ci{ - VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, - static_cast(window_info.display_connection), - reinterpret_cast(window_info.render_surface)}; - const auto vkCreateXlibSurfaceKHR = reinterpret_cast( - dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); - if (!vkCreateXlibSurfaceKHR || - vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); - return false; - } - } - if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { - const VkWaylandSurfaceCreateInfoKHR wayland_ci{ - VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, - static_cast(window_info.display_connection), - static_cast(window_info.render_surface)}; - const auto vkCreateWaylandSurfaceKHR = reinterpret_cast( - dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); - if (!vkCreateWaylandSurfaceKHR || - vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != - VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); - return false; - } - } -#endif - if (!unsafe_surface) { - LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); - return false; - } - - surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); - return true; -} - bool RendererVulkan::PickDevices() { const auto devices = instance.EnumeratePhysicalDevices(); if (!devices) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index fa7628d0e..7c5ce1da4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,8 +56,6 @@ public: static std::vector EnumerateDevices(); private: - bool CreateSurface(); - bool PickDevices(); void Report() const; diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp new file mode 100644 index 000000000..3c3238f96 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -0,0 +1,81 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/vulkan_common/vulkan_surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +// Include these late to avoid polluting previous headers +#ifdef _WIN32 +#include +// ensure include order +#include +#endif + +#if !defined(_WIN32) && !defined(__APPLE__) +#include +#include +#include +#endif + +namespace Vulkan { + +vk::SurfaceKHR CreateSurface(const vk::Instance& instance, + const Core::Frontend::EmuWindow& emu_window) { + [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); + [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo(); + VkSurfaceKHR unsafe_surface = nullptr; + +#ifdef _WIN32 + if (window_info.type == Core::Frontend::WindowSystemType::Windows) { + const HWND hWnd = static_cast(window_info.render_surface); + const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + nullptr, 0, nullptr, hWnd}; + const auto vkCreateWin32SurfaceKHR = reinterpret_cast( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); + if (!vkCreateWin32SurfaceKHR || + vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#endif +#if !defined(_WIN32) && !defined(__APPLE__) + if (window_info.type == Core::Frontend::WindowSystemType::X11) { + const VkXlibSurfaceCreateInfoKHR xlib_ci{ + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast(window_info.display_connection), + reinterpret_cast(window_info.render_surface)}; + const auto vkCreateXlibSurfaceKHR = reinterpret_cast( + dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); + if (!vkCreateXlibSurfaceKHR || + vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } + if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { + const VkWaylandSurfaceCreateInfoKHR wayland_ci{ + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast(window_info.display_connection), + static_cast(window_info.render_surface)}; + const auto vkCreateWaylandSurfaceKHR = reinterpret_cast( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); + if (!vkCreateWaylandSurfaceKHR || + vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#endif + if (!unsafe_surface) { + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + return vk::SurfaceKHR(unsafe_surface, *instance, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h new file mode 100644 index 000000000..05a169e32 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.h @@ -0,0 +1,18 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Core::Frontend { +class EmuWindow; +} + +namespace Vulkan { + +[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance, + const Core::Frontend::EmuWindow& emu_window); + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 03ca97ac0..012982a3f 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -586,6 +586,11 @@ public: /// @throw Exception on creation failure. DebugUtilsMessenger CreateDebugUtilsMessenger( const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; + + /// Returns dispatch table. + const InstanceDispatch& Dispatch() const noexcept { + return *dld; + } }; class Queue { -- cgit v1.2.3 From 085adfea00a525796a3bf4b2dd345e1df656c930 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 02:27:57 -0300 Subject: renderer_vulkan: Throw when enumerating devices fails Report device enumeration errors with exceptions to be consistent with other initialization related function calls. Reduces the amount of code to maintain. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 35 ++++++++-------------- src/video_core/vulkan_common/vulkan_instance.cpp | 2 +- src/video_core/vulkan_common/vulkan_instance.h | 2 +- src/video_core/vulkan_common/vulkan_wrapper.cpp | 12 +++----- src/video_core/vulkan_common/vulkan_wrapper.h | 3 +- 5 files changed, 21 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 831c204c2..f64318f25 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -170,7 +170,6 @@ void RendererVulkan::ShutDown() { if (const auto& dev = device->GetLogical()) { dev.WaitIdle(); } - rasterizer.reset(); blit_screen.reset(); scheduler.reset(); @@ -180,19 +179,13 @@ void RendererVulkan::ShutDown() { } bool RendererVulkan::PickDevices() { - const auto devices = instance.EnumeratePhysicalDevices(); - if (!devices) { - LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); - return false; - } - + const std::vector devices = instance.EnumeratePhysicalDevices(); const s32 device_index = Settings::values.vulkan_device.GetValue(); - if (device_index < 0 || device_index >= static_cast(devices->size())) { + if (device_index < 0 || device_index >= static_cast(devices.size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; } - const vk::PhysicalDevice physical_device((*devices)[static_cast(device_index)], - dld); + const vk::PhysicalDevice physical_device(devices[static_cast(device_index)], dld); if (!VKDevice::IsSuitable(physical_device, *surface)) { return false; } @@ -224,23 +217,21 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -std::vector RendererVulkan::EnumerateDevices() { +std::vector RendererVulkan::EnumerateDevices() try { vk::InstanceDispatch dld; - Common::DynamicLibrary library = OpenLibrary(); - vk::Instance instance = CreateInstance(library, dld).first; - if (!instance) { - return {}; - } - const std::optional physical_devices = instance.EnumeratePhysicalDevices(); - if (!physical_devices) { - return {}; - } + const Common::DynamicLibrary library = OpenLibrary(); + const vk::Instance instance = CreateInstance(library, dld).first; + const std::vector physical_devices = instance.EnumeratePhysicalDevices(); std::vector names; - names.reserve(physical_devices->size()); - for (const auto& device : *physical_devices) { + names.reserve(physical_devices.size()); + for (const VkPhysicalDevice device : physical_devices) { names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); } return names; + +} catch (const vk::Exception& exception) { + LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); + return {}; } } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index d3d8630e5..ee46fc6cc 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -111,7 +111,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector CreateInstance(Common::DynamicLibrary& library, +std::pair CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, Core::Frontend::WindowSystemType window_type, bool enable_debug_utils, bool enable_layers) { diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h index ff2be0a48..5acca9756 100644 --- a/src/video_core/vulkan_common/vulkan_instance.h +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -14,7 +14,7 @@ namespace Vulkan { [[nodiscard]] std::pair CreateInstance( - Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, bool enable_debug_utils = false, bool enable_layers = false); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index f4177537b..8698c3f92 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -465,17 +465,13 @@ Instance Instance::Create(u32 version, Span layers, Span> Instance::EnumeratePhysicalDevices() const { +std::vector Instance::EnumeratePhysicalDevices() const { u32 num; - if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { - return std::nullopt; - } + Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr)); std::vector physical_devices(num); - if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { - return std::nullopt; - } + Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data())); SortPhysicalDevices(physical_devices, *dld); - return std::make_optional(std::move(physical_devices)); + return physical_devices; } DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 012982a3f..af3083c84 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -580,7 +580,8 @@ public: /// Enumerates physical devices. /// @return Physical devices and an empty handle on failure. - std::optional> EnumeratePhysicalDevices() const; + /// @throw Exception on Vulkan error. + std::vector EnumeratePhysicalDevices() const; /// Creates a debug callback messenger. /// @throw Exception on creation failure. -- cgit v1.2.3 From 53ea06dc17ccf9232aa3326a4621500058f9d253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Dec 2020 02:42:03 -0300 Subject: renderer_vulkan: Remove two step initialization on VKDevice The Vulkan device abstraction either initializes successfully on the constructor or throws a Vulkan exception. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 15 ++++----------- src/video_core/renderer_vulkan/renderer_vulkan.h | 2 +- src/video_core/renderer_vulkan/vk_device.cpp | 12 ++---------- src/video_core/renderer_vulkan/vk_device.h | 3 --- src/video_core/vulkan_common/vulkan_wrapper.cpp | 7 ++----- src/video_core/vulkan_common/vulkan_wrapper.h | 2 +- 6 files changed, 10 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f64318f25..fdce11b06 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -133,10 +133,8 @@ bool RendererVulkan::Init() try { debug_callback = CreateDebugCallback(instance); } surface = CreateSurface(instance, render_window); - if (!PickDevices()) { - return false; - } + InitializeDevice(); Report(); memory_manager = std::make_unique(*device); @@ -178,21 +176,16 @@ void RendererVulkan::ShutDown() { device.reset(); } -bool RendererVulkan::PickDevices() { +void RendererVulkan::InitializeDevice() { const std::vector devices = instance.EnumeratePhysicalDevices(); const s32 device_index = Settings::values.vulkan_device.GetValue(); if (device_index < 0 || device_index >= static_cast(devices.size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); - return false; - } - const vk::PhysicalDevice physical_device(devices[static_cast(device_index)], dld); - if (!VKDevice::IsSuitable(physical_device, *surface)) { - return false; + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - + const vk::PhysicalDevice physical_device(devices[static_cast(device_index)], dld); device = std::make_unique(*instance, instance_version, physical_device, *surface, dld); - return device->Create(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 7c5ce1da4..a05b3bd38 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,7 +56,7 @@ public: static std::vector EnumerateDevices(); private: - bool PickDevices(); + void InitializeDevice(); void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f3dd6eae1..831603e87 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -212,11 +212,7 @@ VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevi instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); -} - -VKDevice::~VKDevice() = default; -bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(); @@ -426,12 +422,7 @@ bool VKDevice::Create() { }; first_next = &diagnostics_nv; } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - if (!logical) { - LOG_ERROR(Render_Vulkan, "Failed to create logical device"); - return false; - } CollectTelemetryParameters(); CollectToolingInfo(); @@ -455,9 +446,10 @@ bool VKDevice::Create() { present_queue = logical.GetQueue(present_family); use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); - return true; } +VKDevice::~VKDevice() = default; + VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 9673f47c7..67617f86d 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -28,9 +28,6 @@ public: VkSurfaceKHR surface, const vk::InstanceDispatch& dld); ~VKDevice(); - /// Initializes the device. Returns true on success. - bool Create(); - /** * Returns a format supported by the device for the passed requeriments. * @param wanted_format The ideal format to be returned. It may not be the returned format. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 8698c3f92..5e15ad607 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -580,7 +580,7 @@ void Semaphore::SetObjectNameEXT(const char* name) const { Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept { + DeviceDispatch& dispatch) { const VkDeviceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = next, @@ -593,11 +593,8 @@ Device Device::Create(VkPhysicalDevice physical_device, Span { public: static Device Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, - DeviceDispatch& dispatch) noexcept; + DeviceDispatch& dispatch); Queue GetQueue(u32 family_index) const noexcept; -- cgit v1.2.3 From f687392e6f29d1b65cd80a18f86a55122bde417c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 01:50:25 -0300 Subject: vk_device: Stop initialization when device is not suitable VKDevice::IsSuitable was not being called. To address this issue, check suitability before initialization and throw an exception if it fails. By doing this, we can deduplicate some code on queue searches. Previosuly we would first search if a present and graphics queue existed, then on initialization we would search again to find the index. --- src/video_core/renderer_vulkan/vk_device.cpp | 96 +++++++++++----------------- src/video_core/renderer_vulkan/vk_device.h | 4 +- 2 files changed, 39 insertions(+), 61 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 831603e87..07edf556d 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -210,6 +210,7 @@ VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevi VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { + CheckSuitability(); SetupFamilies(surface); SetupFeatures(); @@ -548,64 +549,41 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { - bool is_suitable = true; +void VKDevice::CheckSuitability() const { std::bitset available_extensions; - - for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { + for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - const std::string_view name{prop.extensionName}; + const std::string_view name{property.extensionName}; available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; } } - if (!available_extensions.all()) { - for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; - } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); - is_suitable = false; - } - } - - bool has_graphics{}, has_present{}; - const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); - for (u32 i = 0; i < static_cast(queue_family_properties.size()); ++i) { - const auto& family = queue_family_properties[i]; - if (family.queueCount == 0) { + for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { + if (available_extensions[i]) { continue; } - has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; - has_present |= physical.GetSurfaceSupportKHR(i, surface); + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - if (!has_graphics || !has_present) { - LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); - is_suitable = false; - } - // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.GetProperties()}; - const auto& limits{properties.limits}; + const VkPhysicalDeviceLimits& limits{properties.limits}; constexpr u32 required_ubo_size = 65536; if (limits.maxUniformBufferRange < required_ubo_size) { LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required", limits.maxUniformBufferRange, required_ubo_size); - is_suitable = false; + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - constexpr u32 required_num_viewports = 16; if (limits.maxViewports < required_num_viewports) { LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required", limits.maxViewports, required_num_viewports); - is_suitable = false; + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - - const auto features{physical.GetFeatures()}; - const std::array feature_report = { + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + const std::array feature_report{ std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), @@ -623,19 +601,13 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), }; - for (const auto& [supported, name] : feature_report) { - if (supported) { + for (const auto& [is_supported, name] : feature_report) { + if (is_supported) { continue; } LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - is_suitable = false; - } - - if (!is_suitable) { - LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - - return is_suitable; } std::vector VKDevice::LoadExtensions() { @@ -794,28 +766,34 @@ std::vector VKDevice::LoadExtensions() { } void VKDevice::SetupFamilies(VkSurfaceKHR surface) { - std::optional graphics_family_, present_family_; - const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); - for (u32 i = 0; i < static_cast(queue_family_properties.size()); ++i) { - if (graphics_family_ && present_family_) + std::optional graphics; + std::optional present; + for (u32 index = 0; index < static_cast(queue_family_properties.size()); ++index) { + if (graphics && present) { break; - - const auto& queue_family = queue_family_properties[i]; - if (queue_family.queueCount == 0) + } + const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; + if (queue_family.queueCount == 0) { continue; - + } if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { - graphics_family_ = i; + graphics = index; } - if (physical.GetSurfaceSupportKHR(i, surface)) { - present_family_ = i; + if (physical.GetSurfaceSupportKHR(index, surface)) { + present = index; } } - ASSERT(graphics_family_ && present_family_); - - graphics_family = *graphics_family_; - present_family = *present_family_; + if (!graphics) { + LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + if (!present) { + LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + graphics_family = *graphics; + present_family = *present; } void VKDevice::SetupFeatures() { diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 67617f86d..63e123d02 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -229,10 +229,10 @@ public: return use_asynchronous_shaders; } +private: /// Checks if the physical device is suitable. - static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); + void CheckSuitability() const; -private: /// Loads extensions into a vector and stores available ones in this object. std::vector LoadExtensions(); -- cgit v1.2.3 From 7344a7c447af2591c393e69d39892dab217196d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 02:09:55 -0300 Subject: vk_device: Use an array to report lacking device limits This makes easier to add and tune the required device limits. --- src/video_core/renderer_vulkan/vk_device.cpp | 30 ++++++++++++++++------------ 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 07edf556d..024d5c2de 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -567,20 +567,24 @@ void VKDevice::CheckSuitability() const { LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - // TODO(Rodrigo): Check if the device matches all requeriments. + struct LimitTuple { + u32 minimum; + u32 value; + const char* name; + }; const VkPhysicalDeviceLimits& limits{properties.limits}; - - constexpr u32 required_ubo_size = 65536; - if (limits.maxUniformBufferRange < required_ubo_size) { - LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required", - limits.maxUniformBufferRange, required_ubo_size); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } - constexpr u32 required_num_viewports = 16; - if (limits.maxViewports < required_num_viewports) { - LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required", - limits.maxViewports, required_num_viewports); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + const std::array limits_report{ + LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + LimitTuple{16, limits.maxViewports, "maxViewports"}, + LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, + LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, + }; + for (const auto& tuple : limits_report) { + if (tuple.value < tuple.minimum) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, + tuple.minimum, tuple.value); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } } const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; const std::array feature_report{ -- cgit v1.2.3 From cdbee27692d73046cecf56fdea1c90f72ebbc0ce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 04:58:38 -0300 Subject: vulkan_instance: Allow different Vulkan versions and enforce 1.1 For listing the available physical devices we can use Vulkan 1.0. Now that MoltenVK supports 1.1 we can require it for running games. Add missing documentation. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 9 ++++----- src/video_core/renderer_vulkan/renderer_vulkan.h | 1 - src/video_core/renderer_vulkan/vk_device.cpp | 10 ++++------ src/video_core/renderer_vulkan/vk_device.h | 9 ++------- .../renderer_vulkan/vk_shader_decompiler.cpp | 11 ++--------- src/video_core/vulkan_common/vulkan_instance.cpp | 21 ++++++++++++--------- src/video_core/vulkan_common/vulkan_instance.h | 19 +++++++++++++++---- 7 files changed, 39 insertions(+), 41 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index fdce11b06..5b35cb407 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -127,8 +127,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { bool RendererVulkan::Init() try { library = OpenLibrary(); - std::tie(instance, instance_version) = CreateInstance( - library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); + instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + true, Settings::values.renderer_debug); if (Settings::values.renderer_debug) { debug_callback = CreateDebugCallback(instance); } @@ -184,8 +184,7 @@ void RendererVulkan::InitializeDevice() { throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } const vk::PhysicalDevice physical_device(devices[static_cast(device_index)], dld); - device = - std::make_unique(*instance, instance_version, physical_device, *surface, dld); + device = std::make_unique(*instance, physical_device, *surface, dld); } void RendererVulkan::Report() const { @@ -213,7 +212,7 @@ void RendererVulkan::Report() const { std::vector RendererVulkan::EnumerateDevices() try { vk::InstanceDispatch dld; const Common::DynamicLibrary library = OpenLibrary(); - const vk::Instance instance = CreateInstance(library, dld).first; + const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); const std::vector physical_devices = instance.EnumeratePhysicalDevices(); std::vector names; names.reserve(physical_devices.size()); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a05b3bd38..f22f50709 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -68,7 +68,6 @@ private: vk::InstanceDispatch dld; vk::Instance instance; - u32 instance_version{}; vk::SurfaceKHR surface; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 024d5c2de..fd55ca8a8 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -206,10 +206,10 @@ std::unordered_map GetFormatProperties( } // Anonymous namespace -VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, - VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) +VKDevice::VKDevice(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { + format_properties{GetFormatProperties(physical, dld)} { CheckSuitability(); SetupFamilies(surface); SetupFeatures(); @@ -653,9 +653,7 @@ std::vector VKDevice::LoadExtensions() { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); - if (instance_version >= VK_API_VERSION_1_1) { - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - } + test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 63e123d02..146acbe24 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -24,8 +24,8 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical, - VkSurfaceKHR surface, const vk::InstanceDispatch& dld); + explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); ~VKDevice(); /** @@ -79,11 +79,6 @@ public: return present_family; } - /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers. - u32 InstanceApiVersion() const { - return instance_version; - } - /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. u32 ApiVersion() const { return properties.apiVersion; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 09d6f9f35..571460c2f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -272,19 +272,12 @@ bool IsPrecise(Operation operand) { return false; } -u32 ShaderVersion(const VKDevice& device) { - if (device.InstanceApiVersion() < VK_API_VERSION_1_1) { - return 0x00010000; - } - return 0x00010300; -} - class SPIRVDecompiler final : public Sirit::Module { public: explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, const Registry& registry_, const Specialization& specialization_) - : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, - header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { + : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, + registry{registry_}, specialization{specialization_} { if (stage_ != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); } diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index ee46fc6cc..889ecda0c 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -111,10 +111,9 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector CreateInstance(const Common::DynamicLibrary& library, - vk::InstanceDispatch& dld, - Core::Frontend::WindowSystemType window_type, - bool enable_debug_utils, bool enable_layers) { +vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + u32 required_version, Core::Frontend::WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); @@ -134,15 +133,19 @@ std::pair CreateInstance(const Common::DynamicLibrary& librar std::vector layers = Layers(enable_layers); RemoveUnavailableLayers(dld, layers); - // Limit the maximum version of Vulkan to avoid using untested version. - const u32 version = std::min(vk::AvailableVersion(dld), VK_API_VERSION_1_1); - - vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); + const u32 available_version = vk::AvailableVersion(dld); + if (available_version < required_version) { + LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required", + VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version), + VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); + throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); + } + vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); if (!vk::Load(*instance, dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - return std::make_pair(std::move(instance), version); + return instance; } } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h index 5acca9756..e5e3a7144 100644 --- a/src/video_core/vulkan_common/vulkan_instance.h +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -4,8 +4,6 @@ #pragma once -#include - #include "common/common_types.h" #include "common/dynamic_library.h" #include "core/frontend/emu_window.h" @@ -13,8 +11,21 @@ namespace Vulkan { -[[nodiscard]] std::pair CreateInstance( - const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, +/** + * Create a Vulkan instance + * + * @param library Dynamic library to load the Vulkan instance from + * @param dld Dispatch table to load function pointers into + * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) + * @param window_type Window system type's enabled extension + * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not + * @param enable_layers Whether to enable Vulkan validation layers or not + * + * @return A new Vulkan instance + * @throw vk::Exception on failure + */ +[[nodiscard]] vk::Instance CreateInstance( + const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, bool enable_debug_utils = false, bool enable_layers = false); -- cgit v1.2.3 From a745d87971b2c9795e1b2c587bfe30b849b522fa Mon Sep 17 00:00:00 2001 From: Morph Date: Sat, 2 Jan 2021 09:00:05 -0500 Subject: general: Fix various spelling errors --- src/video_core/command_classes/vic.cpp | 2 +- src/video_core/renderer_vulkan/vk_device.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index aa8c9f9de..55e632346 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector& arguments) { void Vic::Execute() { if (output_surface_luma_address == 0) { - LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", vic_state.output_surface.luma_offset); return; } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 370a63f74..85b4f0dff 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -491,7 +491,7 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla } void VKDevice::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); + LOG_CRITICAL(Render_Vulkan, "Device loss occurred!"); // Wait for the log to flush and for Nsight Aftermath to dump the results std::this_thread::sleep_for(std::chrono::seconds{15}); -- cgit v1.2.3 From 974d731926f2389cdce62155214610259845129f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 01:10:53 -0300 Subject: renderer_vulkan: Rename VKDevice to Device The "VK" prefix predates the "Vulkan" namespace. It was carried around the codebase for consistency. "VKDevice" currently is a bad alias with "VkDevice" (only an upcase character of difference) that can cause confusion. Rename all instances of it. --- src/video_core/renderer_vulkan/blit_image.cpp | 6 ++-- src/video_core/renderer_vulkan/blit_image.h | 11 +++---- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 8 ++--- src/video_core/renderer_vulkan/maxwell_to_vk.h | 8 ++--- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 4 +-- src/video_core/renderer_vulkan/renderer_vulkan.h | 4 +-- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 2 +- src/video_core/renderer_vulkan/vk_blit_screen.h | 6 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 6 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.h | 10 +++--- src/video_core/renderer_vulkan/vk_command_pool.cpp | 2 +- src/video_core/renderer_vulkan/vk_command_pool.h | 6 ++-- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 8 ++--- src/video_core/renderer_vulkan/vk_compute_pass.h | 10 +++--- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.h | 6 ++-- .../renderer_vulkan/vk_descriptor_pool.cpp | 2 +- .../renderer_vulkan/vk_descriptor_pool.h | 6 ++-- src/video_core/renderer_vulkan/vk_device.cpp | 38 +++++++++++----------- src/video_core/renderer_vulkan/vk_device.h | 8 ++--- .../renderer_vulkan/vk_fence_manager.cpp | 6 ++-- src/video_core/renderer_vulkan/vk_fence_manager.h | 12 +++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.h | 6 ++-- .../renderer_vulkan/vk_master_semaphore.cpp | 2 +- .../renderer_vulkan/vk_master_semaphore.h | 4 +-- .../renderer_vulkan/vk_memory_manager.cpp | 8 ++--- src/video_core/renderer_vulkan/vk_memory_manager.h | 10 +++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 6 ++-- src/video_core/renderer_vulkan/vk_query_cache.cpp | 20 ++++++------ src/video_core/renderer_vulkan/vk_query_cache.h | 14 ++++---- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 8 ++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 4 +-- src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 6 ++-- .../renderer_vulkan/vk_shader_decompiler.cpp | 6 ++-- .../renderer_vulkan/vk_shader_decompiler.h | 6 ++-- src/video_core/renderer_vulkan/vk_shader_util.cpp | 2 +- src/video_core/renderer_vulkan/vk_shader_util.h | 4 +-- .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- .../renderer_vulkan/vk_staging_buffer_pool.h | 6 ++-- .../renderer_vulkan/vk_stream_buffer.cpp | 2 +- src/video_core/renderer_vulkan/vk_stream_buffer.h | 6 ++-- src/video_core/renderer_vulkan/vk_swapchain.cpp | 2 +- src/video_core/renderer_vulkan/vk_swapchain.h | 6 ++-- .../renderer_vulkan/vk_texture_cache.cpp | 8 ++--- src/video_core/renderer_vulkan/vk_texture_cache.h | 6 ++-- .../renderer_vulkan/vk_update_descriptor.cpp | 2 +- .../renderer_vulkan/vk_update_descriptor.h | 6 ++-- src/video_core/shader/async_shaders.cpp | 2 +- src/video_core/shader/async_shaders.h | 4 +-- 52 files changed, 166 insertions(+), 169 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 504492cac..b412cdb7f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -225,7 +225,7 @@ constexpr std::array MakeStages( }; } -void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, VkSampler sampler, VkImageView image_view) { const VkDescriptorImageInfo image_info{ .sampler = sampler, @@ -247,7 +247,7 @@ void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descr device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); } -void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, +void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, VkSampler sampler, VkImageView image_view_0, VkImageView image_view_1) { const VkDescriptorImageInfo image_info_0{ @@ -326,7 +326,7 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, } // Anonymous namespace -BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, +BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 1a4f66336..43fd3d737 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -15,12 +15,11 @@ namespace Vulkan { using VideoCommon::Offset2D; -class VKDevice; -class VKScheduler; -class StateTracker; - +class Device; class Framebuffer; class ImageView; +class StateTracker; +class VKScheduler; struct BlitImagePipelineKey { constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; @@ -31,7 +30,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: - explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, + explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); ~BlitImageHelper(); @@ -67,7 +66,7 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); - const VKDevice& device; + const Device& device; VKScheduler& scheduler; StateTracker& state_tracker; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index ed4fce714..9c0fcfbce 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -47,7 +47,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter return {}; } -VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, +VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: @@ -222,7 +222,7 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) { } // Anonymous namespace -FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) { +FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format) { ASSERT(static_cast(pixel_format) < std::size(tex_format_tuples)); auto tuple = tex_format_tuples[static_cast(pixel_format)]; @@ -280,7 +280,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { return {}; } -VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, +VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: @@ -526,7 +526,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { return {}; } -VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { +VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: if (!device.IsExtIndexTypeUint8Supported()) { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 8cf5aa711..8fa63c1c9 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter); VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, +VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter); VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); @@ -35,17 +35,17 @@ struct FormatInfo { bool storage; }; -FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); +FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format); VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); -VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); +VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); +VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 5b35cb407..0224fc445 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -42,7 +42,7 @@ std::string GetReadableVersion(u32 version) { VK_VERSION_PATCH(version)); } -std::string GetDriverVersion(const VKDevice& device) { +std::string GetDriverVersion(const Device& device) { // Extracted from // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 const u32 version = device.GetDriverVersion(); @@ -184,7 +184,7 @@ void RendererVulkan::InitializeDevice() { throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } const vk::PhysicalDevice physical_device(devices[static_cast(device_index)], dld); - device = std::make_unique(*instance, physical_device, *surface, dld); + device = std::make_unique(*instance, physical_device, *surface, dld); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f22f50709..5575ffc54 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -27,9 +27,9 @@ class GPU; namespace Vulkan { +class Device; class StateTracker; class VKBlitScreen; -class VKDevice; class VKMemoryManager; class VKSwapchain; class VKScheduler; @@ -74,7 +74,7 @@ private: VKScreenInfo screen_info; vk::DebugUtilsMessenger debug_callback; - std::unique_ptr device; + std::unique_ptr device; std::unique_ptr memory_manager; std::unique_ptr state_tracker; std::unique_ptr scheduler; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index a205cd151..a0f2825e2 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -114,7 +114,7 @@ struct VKBlitScreen::BufferData { VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, - VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, + VideoCore::RasterizerInterface& rasterizer_, const Device& device_, VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, VKScheduler& scheduler_, const VKScreenInfo& screen_info_) : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index cc56c4560..69ed61770 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -33,8 +33,8 @@ namespace Vulkan { struct ScreenInfo; +class Device; class RasterizerVulkan; -class VKDevice; class VKScheduler; class VKSwapchain; @@ -42,7 +42,7 @@ class VKBlitScreen final { public: explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VideoCore::RasterizerInterface& rasterizer, const Device& device, VKMemoryManager& memory_manager, VKSwapchain& swapchain, VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); @@ -85,7 +85,7 @@ private: Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKSwapchain& swapchain; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 79131f819..f87779c4b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -34,13 +34,13 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; -std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { +std::unique_ptr CreateStreamBuffer(const Device& device, VKScheduler& scheduler) { return std::make_unique(device, scheduler); } } // Anonymous namespace -Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ staging_pool_} { @@ -168,7 +168,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const VKDevice& device_, VKMemoryManager& memory_manager_, + const Device& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, VKStagingBufferPool& staging_pool_) : VideoCommon::BufferCache{rasterizer_, gpu_memory_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3ab77a00b..1c39aed34 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -15,13 +15,13 @@ namespace Vulkan { -class VKDevice; +class Device; class VKMemoryManager; class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); ~Buffer(); @@ -41,7 +41,7 @@ public: } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -52,7 +52,7 @@ class VKBufferCache final : public VideoCommon::BufferCache CreateBlock(VAddr cpu_addr, std::size_t size) override; private: - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index ccae04929..ca512d667 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -17,7 +17,7 @@ struct CommandPool::Pool { vk::CommandBuffers cmdbufs; }; -CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) +CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_) : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} CommandPool::~CommandPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index ce0e34515..61c26a22a 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -12,12 +12,12 @@ namespace Vulkan { +class Device; class MasterSemaphore; -class VKDevice; class CommandPool final : public ResourcePool { public: - explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); + explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_); ~CommandPool() override; void Allocate(size_t begin, size_t end) override; @@ -27,7 +27,7 @@ public: private: struct Pool; - const VKDevice& device; + const Device& device; std::vector pools; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5d4543bae..b13ed219a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -86,7 +86,7 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { } // Anonymous namespace -VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, +VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, vk::Span push_constants, @@ -162,7 +162,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return set; } -QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, +QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -211,7 +211,7 @@ std::pair QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), @@ -255,7 +255,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } -QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, +QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 1b7502a4f..7ddb09afb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -15,14 +15,14 @@ namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKStagingBufferPool; class VKUpdateDescriptorQueue; class VKComputePass { public: - explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, + explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, vk::Span push_constants, std::span code); @@ -43,7 +43,7 @@ private: class QuadArrayPass final : public VKComputePass { public: - explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); @@ -59,7 +59,7 @@ private: class Uint8Pass final : public VKComputePass { public: - explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, + explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); @@ -75,7 +75,7 @@ private: class QuadIndexedPass final : public VKComputePass { public: - explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9966dd14a..cd92d5dbe 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -15,7 +15,7 @@ namespace Vulkan { -VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, +VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const SPIRVShader& shader_) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a7197536c..7e16575ac 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -11,13 +11,13 @@ namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKUpdateDescriptorQueue; class VKComputePipeline final { public: - explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const SPIRVShader& shader_); @@ -48,7 +48,7 @@ private: vk::Pipeline CreatePipeline() const; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; ShaderEntries entries; diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 4dea03239..f5ea1ff62 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -32,7 +32,7 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } -VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) +VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ AllocateNewPool()} {} diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 2abcaeddd..f892be7be 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -11,7 +11,7 @@ namespace Vulkan { -class VKDevice; +class Device; class VKDescriptorPool; class VKScheduler; @@ -39,7 +39,7 @@ class VKDescriptorPool final { friend DescriptorAllocator; public: - explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); + explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); ~VKDescriptorPool(); VKDescriptorPool(const VKDescriptorPool&) = delete; @@ -50,7 +50,7 @@ private: vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); - const VKDevice& device; + const Device& device; MasterSemaphore& master_semaphore; std::vector pools; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9008530d5..7a8b3fea0 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -206,8 +206,8 @@ std::unordered_map GetFormatProperties( } // Anonymous namespace -VKDevice::VKDevice(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld_) +Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, format_properties{GetFormatProperties(physical, dld)} { CheckSuitability(); @@ -449,10 +449,10 @@ VKDevice::VKDevice(VkInstance instance_, vk::PhysicalDevice physical_, VkSurface use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } -VKDevice::~VKDevice() = default; +Device::~Device() = default; -VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { +VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { return wanted_format; } @@ -483,18 +483,18 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla return wanted_format; } -void VKDevice::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occurred!"); +void Device::ReportLoss() const { + LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); // Wait for the log to flush and for Nsight Aftermath to dump the results std::this_thread::sleep_for(std::chrono::seconds{15}); } -void VKDevice::SaveShader(const std::vector& spirv) const { +void Device::SaveShader(const std::vector& spirv) const { nsight_aftermath_tracker.SaveShader(spirv); } -bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { +bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, @@ -528,7 +528,7 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) return true; } -bool VKDevice::TestDepthStencilBlits() const { +bool Device::TestDepthStencilBlits() const { static constexpr VkFormatFeatureFlags required_features = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; const auto test_features = [](VkFormatProperties props) { @@ -538,8 +538,8 @@ bool VKDevice::TestDepthStencilBlits() const { test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); } -bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { +bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); @@ -549,7 +549,7 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa return (supported_usage & wanted_usage) == wanted_usage; } -void VKDevice::CheckSuitability() const { +void Device::CheckSuitability() const { std::bitset available_extensions; for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { @@ -614,7 +614,7 @@ void VKDevice::CheckSuitability() const { } } -std::vector VKDevice::LoadExtensions() { +std::vector Device::LoadExtensions() { std::vector extensions; extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); @@ -767,7 +767,7 @@ std::vector VKDevice::LoadExtensions() { return extensions; } -void VKDevice::SetupFamilies(VkSurfaceKHR surface) { +void Device::SetupFamilies(VkSurfaceKHR surface) { const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); std::optional graphics; std::optional present; @@ -798,14 +798,14 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) { present_family = *present; } -void VKDevice::SetupFeatures() { +void Device::SetupFeatures() { const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } -void VKDevice::CollectTelemetryParameters() { +void Device::CollectTelemetryParameters() { VkPhysicalDeviceDriverPropertiesKHR driver{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, .pNext = nullptr, @@ -832,7 +832,7 @@ void VKDevice::CollectTelemetryParameters() { } } -void VKDevice::CollectToolingInfo() { +void Device::CollectToolingInfo() { if (!ext_tooling_info) { return; } @@ -858,7 +858,7 @@ void VKDevice::CollectToolingInfo() { } } -std::vector VKDevice::GetDeviceQueueCreateInfos() const { +std::vector Device::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; std::unordered_set unique_queue_families{graphics_family, present_family}; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 146acbe24..b2651e049 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -22,11 +22,11 @@ enum class FormatType { Linear, Optimal, Buffer }; const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. -class VKDevice final { +class Device final { public: - explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld); - ~VKDevice(); + explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); + ~Device(); /** * Returns a format supported by the device for the passed requeriments. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index cd044c187..3ec1769ed 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -14,11 +14,11 @@ namespace Vulkan { -InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, +InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} -InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, +InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_) : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} @@ -75,7 +75,7 @@ bool InnerFence::IsEventSignalled() const { VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, - const VKDevice& device_, VKScheduler& scheduler_) + const Device& device_, VKScheduler& scheduler_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, device{device_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 272ae6d29..6b51e4587 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -21,16 +21,16 @@ class RasterizerInterface; namespace Vulkan { +class Device; class VKBufferCache; -class VKDevice; class VKQueryCache; class VKScheduler; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, + explicit InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_, bool is_stubbed_); - explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, + explicit InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_); ~InnerFence(); @@ -43,7 +43,7 @@ public: private: bool IsEventSignalled() const; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; vk::Event event; u64 ticks = 0; @@ -58,7 +58,7 @@ public: explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, - const VKDevice& device_, VKScheduler& scheduler_); + const Device& device_, VKScheduler& scheduler_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; @@ -68,7 +68,7 @@ protected: void WaitFence(Fence& fence) override; private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d9c1ed553..360ab86eb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -94,7 +94,7 @@ VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { } // Anonymous namespace -VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, +VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3bc93bc2a..8b6a98fe0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -40,8 +40,8 @@ static_assert(std::has_unique_object_representations_v static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +class Device; class VKDescriptorPool; -class VKDevice; class VKScheduler; class VKUpdateDescriptorQueue; @@ -49,7 +49,7 @@ using SPIRVProgram = std::array, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: - explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, + explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, @@ -85,7 +85,7 @@ private: vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, u32 num_color_buffers) const; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; const GraphicsPipelineCacheKey cache_key; const u64 hash; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index ed6ea0805..16e764cb8 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -14,7 +14,7 @@ namespace Vulkan { using namespace std::chrono_literals; -MasterSemaphore::MasterSemaphore(const VKDevice& device) { +MasterSemaphore::MasterSemaphore(const Device& device) { static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 747d2f3bc..f336f1862 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -12,11 +12,11 @@ namespace Vulkan { -class VKDevice; +class Device; class MasterSemaphore { public: - explicit MasterSemaphore(const VKDevice& device); + explicit MasterSemaphore(const Device& device); ~MasterSemaphore(); /// Returns the current logical tick. diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 35f859f77..875bc65db 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -29,7 +29,7 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: - explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, + explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_, VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) : device{device_}, memory{std::move(memory_)}, properties{properties_}, allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} @@ -104,7 +104,7 @@ private: return std::nullopt; } - const VKDevice& device; ///< Vulkan device. + const Device& device; ///< Vulkan device. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const VkMemoryPropertyFlags properties; ///< Vulkan properties. const u64 allocation_size; ///< Size of this allocation. @@ -117,7 +117,7 @@ private: std::vector commits; }; -VKMemoryManager::VKMemoryManager(const VKDevice& device_) +VKMemoryManager::VKMemoryManager(const Device& device_) : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} VKMemoryManager::~VKMemoryManager() = default; @@ -207,7 +207,7 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi return {}; } -VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, +VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, const vk::DeviceMemory& memory_, u64 begin_, u64 end_) : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 20463ecad..2452bca4e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -13,8 +13,8 @@ namespace Vulkan { +class Device; class MemoryMap; -class VKDevice; class VKMemoryAllocation; class VKMemoryCommitImpl; @@ -22,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr; class VKMemoryManager final { public: - explicit VKMemoryManager(const VKDevice& device_); + explicit VKMemoryManager(const Device& device_); VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); @@ -49,7 +49,7 @@ private: VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags wanted_properties); - const VKDevice& device; ///< Device handler. + const Device& device; ///< Device handler. const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. std::vector> allocations; ///< Current allocations. }; @@ -59,7 +59,7 @@ class VKMemoryCommitImpl final { friend MemoryMap; public: - explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, const vk::DeviceMemory& memory_, u64 begin_, u64 end_); ~VKMemoryCommitImpl(); @@ -85,7 +85,7 @@ private: /// Unmaps memory. void Unmap() const; - const VKDevice& device; ///< Vulkan device. + const Device& device; ///< Vulkan device. const vk::DeviceMemory& memory; ///< Vulkan device memory handler. std::pair interval{}; ///< Interval where the commit exists. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b44fd6159..e3e0ecf15 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -149,7 +149,7 @@ Shader::~Shader() = default; VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 5ce1b17f3..89d635a3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,10 +33,10 @@ class System; namespace Vulkan { +class Device; class RasterizerVulkan; class VKComputePipeline; class VKDescriptorPool; -class VKDevice; class VKScheduler; class VKUpdateDescriptorQueue; @@ -121,7 +121,7 @@ public: explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const VKDevice& device, + Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue); ~VKPipelineCache() override; @@ -148,7 +148,7 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7852178b6..1288d58ec 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -27,7 +27,7 @@ constexpr VkQueryType GetTarget(QueryType type) { } // Anonymous namespace -QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) +QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_) : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} QueryPool::~QueryPool() = default; @@ -68,7 +68,7 @@ void QueryPool::Reserve(std::pair query) { VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const VKDevice& device_, VKScheduler& scheduler_) + const Device& device_, VKScheduler& scheduler_) : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, query_pools{ QueryPool{device_, scheduler_, QueryType::SamplesPassed}, @@ -96,9 +96,9 @@ void VKQueryCache::Reserve(QueryType type, std::pair query) { HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr dependency_, QueryType type_) : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, - query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { - const vk::Device* logical = &cache_.Device().GetLogical(); - cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { + const vk::Device* logical = &cache.GetDevice().GetLogical(); + cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); @@ -109,17 +109,17 @@ HostCounter::~HostCounter() { } void HostCounter::EndQuery() { - cache.Scheduler().Record( + cache.GetScheduler().Record( [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.Scheduler().CurrentTick()) { - cache.Scheduler().Flush(); + if (tick >= cache.GetScheduler().CurrentTick()) { + cache.GetScheduler().Flush(); } u64 data; - const VkResult query_result = cache.Device().GetLogical().GetQueryResults( + const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); @@ -127,7 +127,7 @@ u64 HostCounter::BlockingQuery() const { case VK_SUCCESS: return data; case VK_ERROR_DEVICE_LOST: - cache.Device().ReportLoss(); + cache.GetDevice().ReportLoss(); [[fallthrough]]; default: throw vk::Exception(query_result); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index b4fb6b3b0..7190946b9 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -21,8 +21,8 @@ class RasterizerInterface; namespace Vulkan { class CachedQuery; +class Device; class HostCounter; -class VKDevice; class VKQueryCache; class VKScheduler; @@ -30,7 +30,7 @@ using CounterStream = VideoCommon::CounterStreamBase; class QueryPool final : public ResourcePool { public: - explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); + explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type); ~QueryPool() override; std::pair Commit(); @@ -43,7 +43,7 @@ protected: private: static constexpr std::size_t GROW_STEP = 512; - const VKDevice& device; + const Device& device; const VideoCore::QueryType type; std::vector pools; @@ -55,23 +55,23 @@ class VKQueryCache final public: explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const VKDevice& device_, VKScheduler& scheduler_); + const Device& device_, VKScheduler& scheduler_); ~VKQueryCache(); std::pair AllocateQuery(VideoCore::QueryType type); void Reserve(VideoCore::QueryType type, std::pair query); - const VKDevice& Device() const noexcept { + const Device& GetDevice() const noexcept { return device; } - VKScheduler& Scheduler() const noexcept { + VKScheduler& GetScheduler() const noexcept { return scheduler; } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; std::array query_pools; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c174e7ec..85bd9c1c0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,7 +62,7 @@ namespace { constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { +VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; @@ -239,7 +239,7 @@ public: index.type = type; } - void Bind(const VKDevice& device, VKScheduler& scheduler) const { + void Bind(const Device& device, VKScheduler& scheduler) const { // Use this large switch case to avoid dispatching more memory in the record lambda than // what we need. It looks horrible, but it's the best we can do on standard C++. switch (vertex.num_buffers) { @@ -330,7 +330,7 @@ private: } index; template - void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { + void BindStatic(const Device& device, VKScheduler& scheduler) const { if (device.IsExtExtendedDynamicStateSupported()) { if (index.buffer) { BindStatic(scheduler); @@ -409,7 +409,7 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, - const VKDevice& device_, VKMemoryManager& memory_manager_, + const Device& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7b9ec3bb8..4695718e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -55,7 +55,7 @@ class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - VKScreenInfo& screen_info_, const VKDevice& device_, + VKScreenInfo& screen_info_, const Device& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_); ~RasterizerVulkan() override; @@ -212,7 +212,7 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; VKScreenInfo& screen_info; - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f7b79e74c..86ac1100f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -37,7 +37,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { last = nullptr; } -VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) +VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) : device{device_}, state_tracker{state_tracker_}, master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 1172ec622..4cd43e425 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,17 +17,17 @@ namespace Vulkan { class CommandPool; +class Device; class Framebuffer; class MasterSemaphore; class StateTracker; -class VKDevice; class VKQueryCache; /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class VKScheduler { public: - explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); + explicit VKScheduler(const Device& device, StateTracker& state_tracker); ~VKScheduler(); /// Returns the current command buffer tick. @@ -179,7 +179,7 @@ private: void AcquireNewChunk(); - const VKDevice& device; + const Device& device; StateTracker& state_tracker; std::unique_ptr master_semaphore; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 571460c2f..f80cc1955 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -274,7 +274,7 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler final : public Sirit::Module { public: - explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, + explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, const Registry& registry_, const Specialization& specialization_) : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { @@ -2742,7 +2742,7 @@ private: }; static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - const VKDevice& device; + const Device& device; const ShaderIR& ir; const ShaderType stage; const Tegra::Shader::Header header; @@ -3130,7 +3130,7 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { return entries; } -std::vector Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, +std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, ShaderType stage, const VideoCommon::Shader::Registry& registry, const Specialization& specialization) { return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index ad91ad5de..26381e444 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -15,10 +15,8 @@ #include "video_core/shader/shader_ir.h" namespace Vulkan { -class VKDevice; -} -namespace Vulkan { +class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; @@ -109,7 +107,7 @@ struct SPIRVShader { ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); -std::vector Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, +std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, Tegra::Engines::ShaderType stage, const VideoCommon::Shader::Registry& registry, const Specialization& specialization); diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 630306077..a5f554a6d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -13,7 +13,7 @@ namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::span code) { +vk::ShaderModule BuildShader(const Device& device, std::span code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index 98ee5e668..9517cbe84 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -11,8 +11,8 @@ namespace Vulkan { -class VKDevice; +class Device; -vk::ShaderModule BuildShader(const VKDevice& device, std::span code); +vk::ShaderModule BuildShader(const Device& device, std::span code); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index e5155e886..34038f3d0 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -19,7 +19,7 @@ namespace Vulkan { VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr buffer_) : buffer{std::move(buffer_)} {} -VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, +VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_, VKScheduler& scheduler_) : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 97ed1118a..90dadcbbe 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -14,7 +14,7 @@ namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; struct VKBuffer final { @@ -24,7 +24,7 @@ struct VKBuffer final { class VKStagingBufferPool final { public: - explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, + explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler); ~VKStagingBufferPool(); @@ -58,7 +58,7 @@ private: u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); - const VKDevice& device; + const Device& device; VKMemoryManager& memory_manager; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index aae50bf25..d72acb467 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -60,7 +60,7 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) +VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} { CreateBuffers(); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index aebd68728..2e9c8cb46 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -13,13 +13,13 @@ namespace Vulkan { -class VKDevice; +class Device; class VKFenceWatch; class VKScheduler; class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); + explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler); ~VKStreamBuffer(); /** @@ -54,7 +54,7 @@ private: void WaitPendingOperations(u64 requested_upper_bound); - const VKDevice& device; ///< Vulkan device manager. + const Device& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. vk::Buffer buffer; ///< Mapped buffer. diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 458aa4532..7020e2c66 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -56,7 +56,7 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) +VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) : surface{surface_}, device{device_}, scheduler{scheduler_} {} VKSwapchain::~VKSwapchain() = default; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 25eb20832..2eadd62b3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -15,12 +15,12 @@ struct FramebufferLayout; namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; class VKSwapchain { public: - explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); + explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -73,7 +73,7 @@ private: void Destroy(); const VkSurfaceKHR surface; - const VKDevice& device; + const Device& device; VKScheduler& scheduler; vk::SwapchainKHR swapchain; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e04dd23ef..cf8983f70 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -93,7 +93,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; if (info.type == ImageType::e2D && info.resources.layers >= 6 && @@ -146,14 +146,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { if (info.type == ImageType::Buffer) { return vk::Image{}; } return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { +[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { if (info.type != ImageType::Buffer) { return vk::Buffer{}; } @@ -205,7 +205,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, const ImageView* image_view) { const auto pixel_format = image_view->format; return VkAttachmentDescription{ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 576515bcc..e68a3db43 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -19,11 +19,11 @@ using VideoCommon::Offset2D; using VideoCommon::RenderTargets; using VideoCore::Surface::PixelFormat; -class VKDevice; class VKScheduler; class VKStagingBufferPool; class BlitImageHelper; +class Device; class Image; class ImageView; class Framebuffer; @@ -68,7 +68,7 @@ struct ImageBufferMap { }; struct TextureCacheRuntime { - const VKDevice& device; + const Device& device; VKScheduler& scheduler; VKMemoryManager& memory_manager; VKStagingBufferPool& staging_buffer_pool; @@ -177,7 +177,7 @@ public: private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); - const VKDevice* device = nullptr; + const Device* device = nullptr; std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index c0603ac22..19948845e 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -14,7 +14,7 @@ namespace Vulkan { -VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) +VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} {} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d0ae49010..e214f7195 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -12,7 +12,7 @@ namespace Vulkan { -class VKDevice; +class Device; class VKScheduler; struct DescriptorUpdateEntry { @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class VKUpdateDescriptorQueue final { public: - explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); + explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_); ~VKUpdateDescriptorQueue(); void TickFrame(); @@ -69,7 +69,7 @@ public: } private: - const VKDevice& device; + const Device& device; VKScheduler& scheduler; const DescriptorUpdateEntry* upload_start = nullptr; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 09f93463b..9707136e9 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -134,7 +134,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, } void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, std::vector bindings, diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 004e214a8..f26bbe5ac 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -94,7 +94,7 @@ public: CompilerSettings compiler_settings, const Registry& registry, VAddr cpu_addr); - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, @@ -123,7 +123,7 @@ private: // For Vulkan Vulkan::VKPipelineCache* pp_cache; - const Vulkan::VKDevice* vk_device; + const Vulkan::Device* vk_device; Vulkan::VKScheduler* scheduler; Vulkan::VKDescriptorPool* descriptor_pool; Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; -- cgit v1.2.3 From 3a49c1a691c7e97b2eea0dffd4c1e05b3296f58c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 4 Jan 2021 01:54:54 -0300 Subject: gl_texture_cache: Create base images with sRGB This breaks accelerated decoders trying to imageStore into images with sRGB. The decoders are currently disabled so this won't cause issues at runtime. --- .../renderer_opengl/gl_texture_cache.cpp | 193 ++++++++++----------- src/video_core/renderer_opengl/gl_texture_cache.h | 6 +- 2 files changed, 100 insertions(+), 99 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 4c690418c..710874311 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -61,100 +61,99 @@ struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; - GLenum store_format = internal_format; }; constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; @@ -651,13 +650,11 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, if (IsConverted(runtime.device, info.format, info.type)) { flags |= ImageFlagBits::Converted; gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; - gl_store_format = GL_RGBA8; gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { const auto& tuple = GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; - gl_store_format = tuple.store_format; gl_format = tuple.format; gl_type = tuple.type; } @@ -677,23 +674,23 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } switch (target) { case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); + glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); break; case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); break; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { // TODO: Where should 'fixedsamplelocations' come from? const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); - glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, height >> samples_y, num_layers, GL_FALSE); break; } case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); break; case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: buffer.Create(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 04193e31e..15b7c3676 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -96,6 +96,10 @@ public: FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; + bool HasBrokenTextureViewFormats() const noexcept { + return has_broken_texture_view_formats; + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); @@ -120,6 +124,7 @@ private: UtilShaders util_shaders; std::array, 3> format_properties; + bool has_broken_texture_view_formats = false; StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; @@ -165,7 +170,6 @@ private: OGLTextureView store_view; OGLBuffer buffer; GLenum gl_internal_format = GL_NONE; - GLenum gl_store_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; }; -- cgit v1.2.3 From 7d904fef2e6ac9ce8a3df71e758a36d39b8f69e5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 4 Jan 2021 01:56:44 -0300 Subject: gl_texture_cache: Avoid format views on Intel and AMD Intel and AMD proprietary drivers are incapable of rendering to texture views of different formats than the original texture. Avoid creating these at a cache level. This will consume more memory, emulating them with copies. --- src/video_core/compatible_formats.cpp | 9 +++++---- src/video_core/compatible_formats.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 4 ++++ src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_texture_cache.h | 5 +++++ src/video_core/texture_cache/image_base.cpp | 4 +++- src/video_core/texture_cache/image_view_base.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 15 ++++++++++----- src/video_core/texture_cache/util.cpp | 13 +++++++------ src/video_core/texture_cache/util.h | 8 +++++--- 11 files changed, 48 insertions(+), 21 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 1619d8664..acf2668dc 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -10,9 +10,7 @@ #include "video_core/surface.h" namespace VideoCore::Surface { - namespace { - using Table = std::array, MaxPixelFormat>; // Compatibility table taken from Table 3.X.2 in: @@ -233,10 +231,13 @@ constexpr Table MakeCopyTable() { EnableRange(copy, COPY_CLASS_64_BITS); return copy; } - } // Anonymous namespace -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { + if (broken_views) { + // If format views are broken, only accept formats that are identical. + return format_a == format_b; + } static constexpr Table TABLE = MakeViewTable(); return IsSupported(TABLE, format_a, format_b); } diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index b5eb03bea..9a0522988 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -8,7 +8,7 @@ namespace VideoCore::Surface { -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b24179d59..81b71edfb 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -208,6 +208,7 @@ Device::Device() const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; + const bool is_intel = vendor == "Intel"; bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -231,6 +232,7 @@ Device::Device() has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); + has_broken_texture_view_formats = is_amd || is_intel; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); @@ -248,6 +250,8 @@ Device::Device() LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); + LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", + has_broken_texture_view_formats); if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 13e66846c..3e79d1e37 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_precise_bug; } + bool HasBrokenTextureViewFormats() const { + return has_broken_texture_view_formats; + } + bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } @@ -137,6 +141,7 @@ private: bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; + bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool has_debugging_tool_attached{}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 710874311..546cb6d00 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -430,6 +430,8 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& format_properties[i].emplace(format, properties); } } + has_broken_texture_view_formats = device.HasBrokenTextureViewFormats(); + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 576515bcc..e5599de5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -104,6 +104,11 @@ struct TextureCacheRuntime { } void InsertUploadMemoryBarrier() {} + + bool HasBrokenTextureViewFormats() const noexcept { + // No known Vulkan driver has broken image views + return false; + } }; class Image : public VideoCommon::ImageBase { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 448a05fcc..959b3f115 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -120,7 +120,9 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i if (lhs.info.type == ImageType::Linear) { base = SubresourceBase{.level = 0, .layer = 0}; } else { - base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); + // We are passing relaxed formats as an option, having broken views or not won't matter + static constexpr bool broken_views = false; + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); } if (!base) { LOG_ERROR(HW_GPU, "Image alias should have been flipped"); diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 076a4bcfd..18f72e508 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i .height = std::max(image_info.size.height >> range.base.level, 1u), .depth = std::max(image_info.size.depth >> range.base.level, 1u), } { - ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), "Image view format {} is incompatible with image format {}", info.format, image_info.format); const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 968059842..ad86c50b4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -883,6 +883,7 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, if (!cpu_addr) { return ImageId{}; } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { @@ -892,11 +893,11 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && existing.pitch == info.pitch && IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format)) { + IsViewCompatible(existing.format, info.format, broken_views)) { image_id = existing_image_id; return true; } - } else if (IsSubresource(info, existing_image, gpu_addr, options)) { + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { image_id = existing_image_id; return true; } @@ -926,6 +927,7 @@ template ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); std::vector overlap_ids; std::vector left_aliased_ids; std::vector right_aliased_ids; @@ -940,7 +942,9 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } - const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); + static constexpr bool strict_size = true; + const std::optional solution = + ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); if (solution) { gpu_addr = solution->gpu_addr; cpu_addr = solution->cpu_addr; @@ -950,9 +954,10 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options)) { + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { left_aliased_ids.push_back(overlap_id); - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views)) { right_aliased_ids.push_back(overlap_id); } }); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 9ed1fc007..279932778 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1069,13 +1069,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri std::optional ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size) { + bool strict_size, bool broken_views) { ASSERT(new_info.type != ImageType::Linear); ASSERT(overlap.info.type != ImageType::Linear); if (!IsLayerStrideCompatible(new_info, overlap.info)) { return std::nullopt; } - if (!IsViewCompatible(overlap.info.format, new_info.format)) { + if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { return std::nullopt; } if (gpu_addr == overlap.gpu_addr) { @@ -1118,14 +1118,15 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { } std::optional FindSubresource(const ImageInfo& candidate, const ImageBase& image, - GPUVAddr candidate_addr, RelaxedOptions options) { + GPUVAddr candidate_addr, RelaxedOptions options, + bool broken_views) { const std::optional base = image.TryFindBase(candidate_addr); if (!base) { return std::nullopt; } const ImageInfo& existing = image.info; if (False(options & RelaxedOptions::Format)) { - if (!IsViewCompatible(existing.format, candidate.format)) { + if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { return std::nullopt; } } @@ -1162,8 +1163,8 @@ std::optional FindSubresource(const ImageInfo& candidate, const } bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, - RelaxedOptions options) { - return FindSubresource(candidate, image, candidate_addr, options).has_value(); + RelaxedOptions options, bool broken_views) { + return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); } void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index dbbbd33cd..52a9207d6 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -87,17 +87,19 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima [[nodiscard]] std::optional ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size); + bool strict_size, bool broken_views); [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); [[nodiscard]] std::optional FindSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, - RelaxedOptions options); + RelaxedOptions options, + bool broken_views); [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, - GPUVAddr candidate_addr, RelaxedOptions options); + GPUVAddr candidate_addr, RelaxedOptions options, + bool broken_views); void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src); -- cgit v1.2.3 From 3753553b6a7b493a03f4e6f0e0f726c334502eb0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 01:19:46 -0300 Subject: renderer_vulkan: Move device abstraction to vulkan_common --- src/video_core/CMakeLists.txt | 4 +- src/video_core/renderer_vulkan/blit_image.cpp | 2 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 2 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_command_pool.cpp | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 2 +- src/video_core/renderer_vulkan/vk_device.cpp | 883 --------------------- src/video_core/renderer_vulkan/vk_device.h | 306 ------- .../renderer_vulkan/vk_fence_manager.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_master_semaphore.cpp | 2 +- .../renderer_vulkan/vk_memory_manager.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_query_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 2 +- src/video_core/renderer_vulkan/vk_shader_util.cpp | 2 +- .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- .../renderer_vulkan/vk_stream_buffer.cpp | 2 +- src/video_core/renderer_vulkan/vk_swapchain.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.cpp | 2 +- src/video_core/shader/async_shaders.h | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 883 +++++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 306 +++++++ 31 files changed, 1219 insertions(+), 1217 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_device.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_device.h create mode 100644 src/video_core/vulkan_common/vulkan_device.cpp create mode 100644 src/video_core/vulkan_common/vulkan_device.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4bd48f706..3f3181395 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -131,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_compute_pipeline.h renderer_vulkan/vk_descriptor_pool.cpp renderer_vulkan/vk_descriptor_pool.h - renderer_vulkan/vk_device.cpp - renderer_vulkan/vk_device.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h renderer_vulkan/vk_graphics_pipeline.cpp @@ -257,6 +255,8 @@ add_library(video_core STATIC video_core.h vulkan_common/vulkan_debug_callback.cpp vulkan_common/vulkan_debug_callback.h + vulkan_common/vulkan_device.cpp + vulkan_common/vulkan_device.h vulkan_common/vulkan_instance.cpp vulkan_common/vulkan_instance.h vulkan_common/vulkan_library.cpp diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b412cdb7f..1f6a169ae 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -11,13 +11,13 @@ #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 9c0fcfbce..ca7c2c579 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -9,8 +9,8 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 8fa63c1c9..537969840 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -6,9 +6,9 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan::MaxwellToVK { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 0224fc445..d7437e185 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -22,7 +22,6 @@ #include "video_core/gpu.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -30,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/vulkan_common/vulkan_debug_callback.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_surface.h" diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index a0f2825e2..5e184eb42 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -21,7 +21,6 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -29,6 +28,7 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index f87779c4b..4d517c547 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -9,9 +9,9 @@ #include "core/core.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index ca512d667..a99df9323 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -5,7 +5,7 @@ #include #include "video_core/renderer_vulkan/vk_command_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index b13ed219a..02a6d54b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -15,10 +15,10 @@ #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cd92d5dbe..3a48219b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,11 +6,11 @@ #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index f5ea1ff62..ef9fb5910 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -6,9 +6,9 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp deleted file mode 100644 index 7a8b3fea0..000000000 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ /dev/null @@ -1,883 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "core/settings.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -namespace { - -namespace Alternatives { - -constexpr std::array Depth24UnormS8_UINT{ - VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_D16_UNORM_S8_UINT, - VkFormat{}, -}; - -constexpr std::array Depth16UnormS8_UINT{ - VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, - VkFormat{}, -}; - -} // namespace Alternatives - -constexpr std::array REQUIRED_EXTENSIONS{ - VK_KHR_SWAPCHAIN_EXTENSION_NAME, - VK_KHR_MAINTENANCE1_EXTENSION_NAME, - VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, - VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - VK_KHR_16BIT_STORAGE_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, - VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, - VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, - VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, -}; - -template -void SetNext(void**& next, T& data) { - *next = &data; - next = &data.pNext; -} - -constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { - switch (format) { - case VK_FORMAT_D24_UNORM_S8_UINT: - return Alternatives::Depth24UnormS8_UINT.data(); - case VK_FORMAT_D16_UNORM_S8_UINT: - return Alternatives::Depth16UnormS8_UINT.data(); - default: - return nullptr; - } -} - -VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) { - switch (format_type) { - case FormatType::Linear: - return properties.linearTilingFeatures; - case FormatType::Optimal: - return properties.optimalTilingFeatures; - case FormatType::Buffer: - return properties.bufferFeatures; - default: - return {}; - } -} - -[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) { - static constexpr std::array RDNA_DEVICES{ - "5700", - "5600", - "5500", - "5300", - }; - if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { - return false; - } - return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) { - return device_name.find(name) != std::string_view::npos; - }); -} - -std::unordered_map GetFormatProperties( - vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { - static constexpr std::array formats{ - VK_FORMAT_A8B8G8R8_UNORM_PACK32, - VK_FORMAT_A8B8G8R8_UINT_PACK32, - VK_FORMAT_A8B8G8R8_SNORM_PACK32, - VK_FORMAT_A8B8G8R8_SINT_PACK32, - VK_FORMAT_A8B8G8R8_SRGB_PACK32, - VK_FORMAT_B5G6R5_UNORM_PACK16, - VK_FORMAT_A2B10G10R10_UNORM_PACK32, - VK_FORMAT_A2B10G10R10_UINT_PACK32, - VK_FORMAT_A1R5G5B5_UNORM_PACK16, - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_R32G32B32A32_SINT, - VK_FORMAT_R32G32B32A32_UINT, - VK_FORMAT_R32G32_SFLOAT, - VK_FORMAT_R32G32_SINT, - VK_FORMAT_R32G32_UINT, - VK_FORMAT_R16G16B16A16_SINT, - VK_FORMAT_R16G16B16A16_UINT, - VK_FORMAT_R16G16B16A16_SNORM, - VK_FORMAT_R16G16B16A16_UNORM, - VK_FORMAT_R16G16_UNORM, - VK_FORMAT_R16G16_SNORM, - VK_FORMAT_R16G16_SFLOAT, - VK_FORMAT_R16G16_SINT, - VK_FORMAT_R16_UNORM, - VK_FORMAT_R16_UINT, - VK_FORMAT_R8G8B8A8_SRGB, - VK_FORMAT_R8G8_UNORM, - VK_FORMAT_R8G8_SNORM, - VK_FORMAT_R8G8_SINT, - VK_FORMAT_R8G8_UINT, - VK_FORMAT_R8_UNORM, - VK_FORMAT_R8_SNORM, - VK_FORMAT_R8_SINT, - VK_FORMAT_R8_UINT, - VK_FORMAT_B10G11R11_UFLOAT_PACK32, - VK_FORMAT_R32_SFLOAT, - VK_FORMAT_R32_UINT, - VK_FORMAT_R32_SINT, - VK_FORMAT_R16_SFLOAT, - VK_FORMAT_R16G16B16A16_SFLOAT, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, - VK_FORMAT_R4G4B4A4_UNORM_PACK16, - VK_FORMAT_D32_SFLOAT, - VK_FORMAT_D16_UNORM, - VK_FORMAT_D16_UNORM_S8_UINT, - VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - VK_FORMAT_BC2_UNORM_BLOCK, - VK_FORMAT_BC3_UNORM_BLOCK, - VK_FORMAT_BC4_UNORM_BLOCK, - VK_FORMAT_BC4_SNORM_BLOCK, - VK_FORMAT_BC5_UNORM_BLOCK, - VK_FORMAT_BC5_SNORM_BLOCK, - VK_FORMAT_BC7_UNORM_BLOCK, - VK_FORMAT_BC6H_UFLOAT_BLOCK, - VK_FORMAT_BC6H_SFLOAT_BLOCK, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - VK_FORMAT_BC2_SRGB_BLOCK, - VK_FORMAT_BC3_SRGB_BLOCK, - VK_FORMAT_BC7_SRGB_BLOCK, - VK_FORMAT_ASTC_4x4_UNORM_BLOCK, - VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x4_UNORM_BLOCK, - VK_FORMAT_ASTC_5x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x5_UNORM_BLOCK, - VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK, - VK_FORMAT_ASTC_6x5_SRGB_BLOCK, - VK_FORMAT_ASTC_6x6_UNORM_BLOCK, - VK_FORMAT_ASTC_6x6_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_UNORM_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK, - VK_FORMAT_ASTC_8x6_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_UNORM_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_10x5_UNORM_BLOCK, - VK_FORMAT_ASTC_10x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x6_UNORM_BLOCK, - VK_FORMAT_ASTC_10x6_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, - VK_FORMAT_ASTC_10x10_UNORM_BLOCK, - VK_FORMAT_ASTC_10x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x10_UNORM_BLOCK, - VK_FORMAT_ASTC_12x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x12_UNORM_BLOCK, - VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK, - VK_FORMAT_ASTC_8x6_SRGB_BLOCK, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK, - VK_FORMAT_ASTC_6x5_SRGB_BLOCK, - VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, - }; - std::unordered_map format_properties; - for (const auto format : formats) { - format_properties.emplace(format, physical.GetFormatProperties(format)); - } - return format_properties; -} - -} // Anonymous namespace - -Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld_) - : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical, dld)} { - CheckSuitability(); - SetupFamilies(surface); - SetupFeatures(); - - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(); - - VkPhysicalDeviceFeatures2 features2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = nullptr, - }; - const void* first_next = &features2; - void** next = &features2.pNext; - - features2.features = { - .robustBufferAccess = false, - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = false, - .dualSrcBlend = false, - .logicOp = false, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = false, - .depthBounds = false, - .wideLines = false, - .largePoints = true, - .alphaToOne = false, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = is_optimal_astc_supported, - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = true, - .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, - .pNext = nullptr, - .timelineSemaphore = true, - }; - SetNext(next, timeline_semaphore); - - VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, - .pNext = nullptr, - .storageBuffer16BitAccess = false, - .uniformAndStorageBuffer16BitAccess = true, - .storagePushConstant16 = false, - .storageInputOutput16 = false, - }; - SetNext(next, bit16_storage); - - VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR, - .pNext = nullptr, - .storageBuffer8BitAccess = false, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = false, - }; - SetNext(next, bit8_storage); - - VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT, - .hostQueryReset = true, - }; - SetNext(next, host_query_reset); - - VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; - if (is_float16_supported) { - float16_int8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, - .pNext = nullptr, - .shaderFloat16 = true, - .shaderInt8 = false, - }; - SetNext(next, float16_int8); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); - } - - if (!nv_viewport_swizzle) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); - } - - VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; - if (khr_uniform_buffer_standard_layout) { - std430_layout = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR, - .pNext = nullptr, - .uniformBufferStandardLayout = true, - }; - SetNext(next, std430_layout); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); - } - - VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; - if (ext_index_type_uint8) { - index_type_uint8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, - .pNext = nullptr, - .indexTypeUint8 = true, - }; - SetNext(next, index_type_uint8); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); - } - - VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; - if (ext_transform_feedback) { - transform_feedback = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, - .pNext = nullptr, - .transformFeedback = true, - .geometryStreams = true, - }; - SetNext(next, transform_feedback); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); - } - - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; - if (ext_custom_border_color) { - custom_border = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, - .pNext = nullptr, - .customBorderColors = VK_TRUE, - .customBorderColorWithoutFormat = VK_TRUE, - }; - SetNext(next, custom_border); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); - } - - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - if (ext_extended_dynamic_state) { - dynamic_state = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState = VK_TRUE, - }; - SetNext(next, dynamic_state); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); - } - - VkPhysicalDeviceRobustness2FeaturesEXT robustness2; - if (ext_robustness2) { - robustness2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, - .pNext = nullptr, - .robustBufferAccess2 = false, - .robustImageAccess2 = true, - .nullDescriptor = true, - }; - SetNext(next, robustness2); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); - } - - if (!ext_depth_range_unrestricted) { - LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); - } - - VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { - nsight_aftermath_tracker.Initialize(); - - diagnostics_nv = { - .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, - .pNext = &features2, - .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV, - }; - first_next = &diagnostics_nv; - } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - - CollectTelemetryParameters(); - CollectToolingInfo(); - - if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; - } - if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { - // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it - // seems to cause stability issues - LOG_WARNING( - Render_Vulkan, - "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; - } - - graphics_queue = logical.GetQueue(graphics_family); - present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); -} - -Device::~Device() = default; - -VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { - if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { - return wanted_format; - } - // The wanted format is not supported by hardware, search for alternatives - const VkFormat* alternatives = GetFormatAlternatives(wanted_format); - if (alternatives == nullptr) { - UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " - "hardware does not support it", - wanted_format, wanted_usage, format_type); - return wanted_format; - } - - std::size_t i = 0; - for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) { - if (!IsFormatSupported(alternative, wanted_usage, format_type)) { - continue; - } - LOG_WARNING(Render_Vulkan, - "Emulating format={} with alternative format={} with usage={} and type={}", - wanted_format, alternative, wanted_usage, format_type); - return alternative; - } - - // No alternatives found, panic - UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " - "doesn't support any of the alternatives", - wanted_format, wanted_usage, format_type); - return wanted_format; -} - -void Device::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); - - // Wait for the log to flush and for Nsight Aftermath to dump the results - std::this_thread::sleep_for(std::chrono::seconds{15}); -} - -void Device::SaveShader(const std::vector& spirv) const { - nsight_aftermath_tracker.SaveShader(spirv); -} - -bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { - // Disable for now to avoid converting ASTC twice. - static constexpr std::array astc_formats = { - VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, - VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, - VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - }; - if (!features.textureCompressionASTC_LDR) { - return false; - } - const auto format_feature_usage{ - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; - for (const auto format : astc_formats) { - const auto physical_format_properties{physical.GetFormatProperties(format)}; - if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) { - return false; - } - } - return true; -} - -bool Device::TestDepthStencilBlits() const { - static constexpr VkFormatFeatureFlags required_features = - VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - const auto test_features = [](VkFormatProperties props) { - return (props.optimalTilingFeatures & required_features) == required_features; - }; - return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && - test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); -} - -bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const { - const auto it = format_properties.find(wanted_format); - if (it == format_properties.end()) { - UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); - return true; - } - const auto supported_usage = GetFormatFeatures(it->second, format_type); - return (supported_usage & wanted_usage) == wanted_usage; -} - -void Device::CheckSuitability() const { - std::bitset available_extensions; - for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { - for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; - } - const std::string_view name{property.extensionName}; - available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; - } - } - for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; - } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); - throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); - } - struct LimitTuple { - u32 minimum; - u32 value; - const char* name; - }; - const VkPhysicalDeviceLimits& limits{properties.limits}; - const std::array limits_report{ - LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, - LimitTuple{16, limits.maxViewports, "maxViewports"}, - LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, - LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, - }; - for (const auto& tuple : limits_report) { - if (tuple.value < tuple.minimum) { - LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, - tuple.minimum, tuple.value); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } - } - const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; - const std::array feature_report{ - std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.imageCubeArray, "imageCubeArray"), - std::make_pair(features.independentBlend, "independentBlend"), - std::make_pair(features.depthClamp, "depthClamp"), - std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), - std::make_pair(features.largePoints, "largePoints"), - std::make_pair(features.multiViewport, "multiViewport"), - std::make_pair(features.depthBiasClamp, "depthBiasClamp"), - std::make_pair(features.geometryShader, "geometryShader"), - std::make_pair(features.tessellationShader, "tessellationShader"), - std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), - std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), - std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), - std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), - std::make_pair(features.shaderStorageImageWriteWithoutFormat, - "shaderStorageImageWriteWithoutFormat"), - }; - for (const auto& [is_supported, name] : feature_report) { - if (is_supported) { - continue; - } - LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } -} - -std::vector Device::LoadExtensions() { - std::vector extensions; - extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); - extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); - - bool has_khr_shader_float16_int8{}; - bool has_ext_subgroup_size_control{}; - bool has_ext_transform_feedback{}; - bool has_ext_custom_border_color{}; - bool has_ext_extended_dynamic_state{}; - bool has_ext_robustness2{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { - const auto test = [&](std::optional> status, const char* name, - bool push) { - if (extension.extensionName != std::string_view(name)) { - return; - } - if (push) { - extensions.push_back(name); - } - if (status) { - status->get() = true; - } - }; - test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); - test(khr_uniform_buffer_standard_layout, - VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); - test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); - test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); - test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); - test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, - true); - test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); - test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); - test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); - test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); - test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { - test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, - true); - } - } - - VkPhysicalDeviceFeatures2KHR features; - features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - - VkPhysicalDeviceProperties2KHR physical_properties; - physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - - if (has_khr_shader_float16_int8) { - VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; - float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; - float16_int8_features.pNext = nullptr; - features.pNext = &float16_int8_features; - - physical.GetFeatures2KHR(features); - is_float16_supported = float16_int8_features.shaderFloat16; - extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); - } - - if (has_ext_subgroup_size_control) { - VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; - subgroup_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; - subgroup_features.pNext = nullptr; - features.pNext = &subgroup_features; - physical.GetFeatures2KHR(features); - - VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties; - subgroup_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; - subgroup_properties.pNext = nullptr; - physical_properties.pNext = &subgroup_properties; - physical.GetProperties2KHR(physical_properties); - - is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; - - if (subgroup_features.subgroupSizeControl && - subgroup_properties.minSubgroupSize <= GuestWarpSize && - subgroup_properties.maxSubgroupSize >= GuestWarpSize) { - extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; - } - } else { - is_warp_potentially_bigger = true; - } - - if (has_ext_transform_feedback) { - VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; - tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - tfb_features.pNext = nullptr; - features.pNext = &tfb_features; - physical.GetFeatures2KHR(features); - - VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; - tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; - tfb_properties.pNext = nullptr; - physical_properties.pNext = &tfb_properties; - physical.GetProperties2KHR(physical_properties); - - if (tfb_features.transformFeedback && tfb_features.geometryStreams && - tfb_properties.maxTransformFeedbackStreams >= 4 && - tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && - tfb_properties.transformFeedbackDraw) { - extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); - ext_transform_feedback = true; - } - } - - if (has_ext_custom_border_color) { - VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; - border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - border_features.pNext = nullptr; - features.pNext = &border_features; - physical.GetFeatures2KHR(features); - - if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) { - extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - ext_custom_border_color = true; - } - } - - if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; - physical.GetFeatures2KHR(features); - - if (dynamic_state.extendedDynamicState) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - ext_extended_dynamic_state = true; - } - } - - if (has_ext_robustness2) { - VkPhysicalDeviceRobustness2FeaturesEXT robustness2; - robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = nullptr; - features.pNext = &robustness2; - physical.GetFeatures2KHR(features); - if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { - extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); - ext_robustness2 = true; - } - } - - return extensions; -} - -void Device::SetupFamilies(VkSurfaceKHR surface) { - const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); - std::optional graphics; - std::optional present; - for (u32 index = 0; index < static_cast(queue_family_properties.size()); ++index) { - if (graphics && present) { - break; - } - const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; - if (queue_family.queueCount == 0) { - continue; - } - if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { - graphics = index; - } - if (physical.GetSurfaceSupportKHR(index, surface)) { - present = index; - } - } - if (!graphics) { - LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } - if (!present) { - LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } - graphics_family = *graphics; - present_family = *present; -} - -void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); -} - -void Device::CollectTelemetryParameters() { - VkPhysicalDeviceDriverPropertiesKHR driver{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, - .pNext = nullptr, - .driverID = {}, - .driverName = {}, - .driverInfo = {}, - .conformanceVersion = {}, - }; - - VkPhysicalDeviceProperties2KHR device_properties{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, - .pNext = &driver, - .properties = {}, - }; - physical.GetProperties2KHR(device_properties); - - driver_id = driver.driverID; - vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } -} - -void Device::CollectToolingInfo() { - if (!ext_tooling_info) { - return; - } - const auto vkGetPhysicalDeviceToolPropertiesEXT = - reinterpret_cast( - dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); - if (!vkGetPhysicalDeviceToolPropertiesEXT) { - return; - } - u32 tool_count = 0; - if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { - return; - } - std::vector tools(tool_count); - if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { - return; - } - for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { - const std::string_view name = tool.name; - LOG_INFO(Render_Vulkan, "{}", name); - has_renderdoc = has_renderdoc || name == "RenderDoc"; - has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; - } -} - -std::vector Device::GetDeviceQueueCreateInfos() const { - static constexpr float QUEUE_PRIORITY = 1.0f; - - std::unordered_set unique_queue_families{graphics_family, present_family}; - std::vector queue_cis; - queue_cis.reserve(unique_queue_families.size()); - - for (const u32 queue_family : unique_queue_families) { - auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .queueFamilyIndex = queue_family, - .queueCount = 1, - .pQueuePriorities = nullptr, - }); - ci.pQueuePriorities = &QUEUE_PRIORITY; - } - - return queue_cis; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h deleted file mode 100644 index b2651e049..000000000 --- a/src/video_core/renderer_vulkan/vk_device.h +++ /dev/null @@ -1,306 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -/// Format usage descriptor. -enum class FormatType { Linear, Optimal, Buffer }; - -/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). -const u32 GuestWarpSize = 32; - -/// Handles data specific to a physical device. -class Device final { -public: - explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, - const vk::InstanceDispatch& dld); - ~Device(); - - /** - * Returns a format supported by the device for the passed requeriments. - * @param wanted_format The ideal format to be returned. It may not be the returned format. - * @param wanted_usage The usage that must be fulfilled even if the format is not supported. - * @param format_type Format type usage. - * @returns A format supported by the device. - */ - VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const; - - /// Reports a device loss. - void ReportLoss() const; - - /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector& spirv) const; - - /// Returns the dispatch loader with direct function pointers of the device. - const vk::DeviceDispatch& GetDispatchLoader() const { - return dld; - } - - /// Returns the logical device. - const vk::Device& GetLogical() const { - return logical; - } - - /// Returns the physical device. - vk::PhysicalDevice GetPhysical() const { - return physical; - } - - /// Returns the main graphics queue. - vk::Queue GetGraphicsQueue() const { - return graphics_queue; - } - - /// Returns the main present queue. - vk::Queue GetPresentQueue() const { - return present_queue; - } - - /// Returns main graphics queue family index. - u32 GetGraphicsFamily() const { - return graphics_family; - } - - /// Returns main present queue family index. - u32 GetPresentFamily() const { - return present_family; - } - - /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. - u32 ApiVersion() const { - return properties.apiVersion; - } - - /// Returns the current driver version provided in Vulkan-formatted version numbers. - u32 GetDriverVersion() const { - return properties.driverVersion; - } - - /// Returns the device name. - std::string_view GetModelName() const { - return properties.deviceName; - } - - /// Returns the driver ID. - VkDriverIdKHR GetDriverID() const { - return driver_id; - } - - /// Returns uniform buffer alignment requeriment. - VkDeviceSize GetUniformBufferAlignment() const { - return properties.limits.minUniformBufferOffsetAlignment; - } - - /// Returns storage alignment requeriment. - VkDeviceSize GetStorageBufferAlignment() const { - return properties.limits.minStorageBufferOffsetAlignment; - } - - /// Returns the maximum range for storage buffers. - VkDeviceSize GetMaxStorageBufferRange() const { - return properties.limits.maxStorageBufferRange; - } - - /// Returns the maximum size for push constants. - VkDeviceSize GetMaxPushConstantsSize() const { - return properties.limits.maxPushConstantsSize; - } - - /// Returns the maximum size for shared memory. - u32 GetMaxComputeSharedMemorySize() const { - return properties.limits.maxComputeSharedMemorySize; - } - - /// Returns true if ASTC is natively supported. - bool IsOptimalAstcSupported() const { - return is_optimal_astc_supported; - } - - /// Returns true if the device supports float16 natively - bool IsFloat16Supported() const { - return is_float16_supported; - } - - /// Returns true if the device warp size can potentially be bigger than guest's warp size. - bool IsWarpSizePotentiallyBiggerThanGuest() const { - return is_warp_potentially_bigger; - } - - /// Returns true if the device can be forced to use the guest warp size. - bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { - return guest_warp_stages & stage; - } - - /// Returns true if formatless image load is supported. - bool IsFormatlessImageLoadSupported() const { - return is_formatless_image_load_supported; - } - - /// Returns true when blitting from and to depth stencil images is supported. - bool IsBlitDepthStencilSupported() const { - return is_blit_depth_stencil_supported; - } - - /// Returns true if the device supports VK_NV_viewport_swizzle. - bool IsNvViewportSwizzleSupported() const { - return nv_viewport_swizzle; - } - - /// Returns true if the device supports VK_EXT_scalar_block_layout. - bool IsKhrUniformBufferStandardLayoutSupported() const { - return khr_uniform_buffer_standard_layout; - } - - /// Returns true if the device supports VK_EXT_index_type_uint8. - bool IsExtIndexTypeUint8Supported() const { - return ext_index_type_uint8; - } - - /// Returns true if the device supports VK_EXT_sampler_filter_minmax. - bool IsExtSamplerFilterMinmaxSupported() const { - return ext_sampler_filter_minmax; - } - - /// Returns true if the device supports VK_EXT_depth_range_unrestricted. - bool IsExtDepthRangeUnrestrictedSupported() const { - return ext_depth_range_unrestricted; - } - - /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. - bool IsExtShaderViewportIndexLayerSupported() const { - return ext_shader_viewport_index_layer; - } - - /// Returns true if the device supports VK_EXT_transform_feedback. - bool IsExtTransformFeedbackSupported() const { - return ext_transform_feedback; - } - - /// Returns true if the device supports VK_EXT_custom_border_color. - bool IsExtCustomBorderColorSupported() const { - return ext_custom_border_color; - } - - /// Returns true if the device supports VK_EXT_extended_dynamic_state. - bool IsExtExtendedDynamicStateSupported() const { - return ext_extended_dynamic_state; - } - - /// Returns true if the device supports VK_EXT_shader_stencil_export. - bool IsExtShaderStencilExportSupported() const { - return ext_shader_stencil_export; - } - - /// Returns true when a known debugging tool is attached. - bool HasDebuggingToolAttached() const { - return has_renderdoc || has_nsight_graphics; - } - - /// Returns the vendor name reported from Vulkan. - std::string_view GetVendorName() const { - return vendor_name; - } - - /// Returns the list of available extensions. - const std::vector& GetAvailableExtensions() const { - return reported_extensions; - } - - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - -private: - /// Checks if the physical device is suitable. - void CheckSuitability() const; - - /// Loads extensions into a vector and stores available ones in this object. - std::vector LoadExtensions(); - - /// Sets up queue families. - void SetupFamilies(VkSurfaceKHR surface); - - /// Sets up device features. - void SetupFeatures(); - - /// Collects telemetry information from the device. - void CollectTelemetryParameters(); - - /// Collects information about attached tools. - void CollectToolingInfo(); - - /// Returns a list of queue initialization descriptors. - std::vector GetDeviceQueueCreateInfos() const; - - /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; - - /// Returns true if the device natively supports blitting depth stencil images. - bool TestDepthStencilBlits() const; - - /// Returns true if a format is supported. - bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const; - - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - VkDriverIdKHR driver_id{}; ///< Driver ID. - VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. - bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. - bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. - bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - - // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector reported_extensions; ///< Reported Vulkan extensions. - - /// Format properties dictionary. - std::unordered_map format_properties; - - /// Nsight Aftermath GPU crash tracker - NsightAftermathTracker nsight_aftermath_tracker; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 3ec1769ed..4c5bc0aa1 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -6,10 +6,10 @@ #include #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 360ab86eb..8a94464f6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -12,11 +12,11 @@ #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 16e764cb8..56ec5e380 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -6,8 +6,8 @@ #include #include "core/settings.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 875bc65db..a6abd0eee 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -11,8 +11,8 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e3e0ecf15..02282e36f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,7 +19,6 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -29,6 +28,7 @@ #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 1288d58ec..7cadd5147 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -7,10 +7,10 @@ #include #include -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 85bd9c1c0..26a8f577c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -38,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 86ac1100f..66004f9c0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -11,12 +11,12 @@ #include "common/microprofile.h" #include "common/thread.h" #include "video_core/renderer_vulkan/vk_command_pool.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f80cc1955..89cbe01ad 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -22,11 +22,11 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader/transform_feedback.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index a5f554a6d..aaad4f292 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -7,8 +7,8 @@ #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 34038f3d0..1e0b8b922 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -9,9 +9,9 @@ #include "common/bit_util.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index d72acb467..a09fe084e 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -10,9 +10,9 @@ #include "common/alignment.h" #include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 7020e2c66..725a2a05d 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -11,9 +11,9 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index cf8983f70..bd11de012 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -10,10 +10,12 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 19948845e..f99273c6a 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -7,9 +7,9 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index f26bbe5ac..0dbb1a31f 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -24,9 +24,9 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Core::Frontend { class EmuWindow; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp new file mode 100644 index 000000000..67183eed8 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -0,0 +1,883 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "core/settings.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +namespace { + +namespace Alternatives { + +constexpr std::array Depth24UnormS8_UINT{ + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, + VkFormat{}, +}; + +constexpr std::array Depth16UnormS8_UINT{ + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VkFormat{}, +}; + +} // namespace Alternatives + +constexpr std::array REQUIRED_EXTENSIONS{ + VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_KHR_MAINTENANCE1_EXTENSION_NAME, + VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, + VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, + VK_KHR_16BIT_STORAGE_EXTENSION_NAME, + VK_KHR_8BIT_STORAGE_EXTENSION_NAME, + VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, + VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, + VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, +}; + +template +void SetNext(void**& next, T& data) { + *next = &data; + next = &data.pNext; +} + +constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { + switch (format) { + case VK_FORMAT_D24_UNORM_S8_UINT: + return Alternatives::Depth24UnormS8_UINT.data(); + case VK_FORMAT_D16_UNORM_S8_UINT: + return Alternatives::Depth16UnormS8_UINT.data(); + default: + return nullptr; + } +} + +VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) { + switch (format_type) { + case FormatType::Linear: + return properties.linearTilingFeatures; + case FormatType::Optimal: + return properties.optimalTilingFeatures; + case FormatType::Buffer: + return properties.bufferFeatures; + default: + return {}; + } +} + +[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) { + static constexpr std::array RDNA_DEVICES{ + "5700", + "5600", + "5500", + "5300", + }; + if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { + return false; + } + return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) { + return device_name.find(name) != std::string_view::npos; + }); +} + +std::unordered_map GetFormatProperties( + vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { + static constexpr std::array formats{ + VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SINT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_SINT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_SINT, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_SINT, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_UINT, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_SINT, + VK_FORMAT_R8G8_UINT, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_SINT, + VK_FORMAT_R8_UINT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D16_UNORM, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + }; + std::unordered_map format_properties; + for (const auto format : formats) { + format_properties.emplace(format, physical.GetFormatProperties(format)); + } + return format_properties; +} + +} // Anonymous namespace + +Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld_) + : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, + format_properties{GetFormatProperties(physical, dld)} { + CheckSuitability(); + SetupFamilies(surface); + SetupFeatures(); + + const auto queue_cis = GetDeviceQueueCreateInfos(); + const std::vector extensions = LoadExtensions(); + + VkPhysicalDeviceFeatures2 features2{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = nullptr, + }; + const void* first_next = &features2; + void** next = &features2.pNext; + + features2.features = { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = true, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = true, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = true, + .alphaToOne = false, + .multiViewport = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = is_optimal_astc_supported, + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = true, + .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, + .pNext = nullptr, + .timelineSemaphore = true, + }; + SetNext(next, timeline_semaphore); + + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer16BitAccess = false, + .uniformAndStorageBuffer16BitAccess = true, + .storagePushConstant16 = false, + .storageInputOutput16 = false, + }; + SetNext(next, bit16_storage); + + VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer8BitAccess = false, + .uniformAndStorageBuffer8BitAccess = true, + .storagePushConstant8 = false, + }; + SetNext(next, bit8_storage); + + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT, + .hostQueryReset = true, + }; + SetNext(next, host_query_reset); + + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; + if (is_float16_supported) { + float16_int8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, + .pNext = nullptr, + .shaderFloat16 = true, + .shaderInt8 = false, + }; + SetNext(next, float16_int8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); + } + + if (!nv_viewport_swizzle) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); + } + + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; + if (khr_uniform_buffer_standard_layout) { + std430_layout = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .uniformBufferStandardLayout = true, + }; + SetNext(next, std430_layout); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); + } + + VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; + if (ext_index_type_uint8) { + index_type_uint8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, + .pNext = nullptr, + .indexTypeUint8 = true, + }; + SetNext(next, index_type_uint8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); + } + + VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; + if (ext_transform_feedback) { + transform_feedback = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, + .pNext = nullptr, + .transformFeedback = true, + .geometryStreams = true, + }; + SetNext(next, transform_feedback); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); + } + + VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; + if (ext_custom_border_color) { + custom_border = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, + .pNext = nullptr, + .customBorderColors = VK_TRUE, + .customBorderColorWithoutFormat = VK_TRUE, + }; + SetNext(next, custom_border); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); + } + + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + if (ext_extended_dynamic_state) { + dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .extendedDynamicState = VK_TRUE, + }; + SetNext(next, dynamic_state); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); + } + + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + if (ext_robustness2) { + robustness2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, + .pNext = nullptr, + .robustBufferAccess2 = false, + .robustImageAccess2 = true, + .nullDescriptor = true, + }; + SetNext(next, robustness2); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); + } + + if (!ext_depth_range_unrestricted) { + LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); + } + + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; + if (nv_device_diagnostics_config) { + nsight_aftermath_tracker.Initialize(); + + diagnostics_nv = { + .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, + .pNext = &features2, + .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV, + }; + first_next = &diagnostics_nv; + } + logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); + + CollectTelemetryParameters(); + CollectToolingInfo(); + + if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { + LOG_WARNING( + Render_Vulkan, + "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); + ext_extended_dynamic_state = false; + } + if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { + // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it + // seems to cause stability issues + LOG_WARNING( + Render_Vulkan, + "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } + + graphics_queue = logical.GetQueue(graphics_family); + present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); +} + +Device::~Device() = default; + +VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { + if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { + return wanted_format; + } + // The wanted format is not supported by hardware, search for alternatives + const VkFormat* alternatives = GetFormatAlternatives(wanted_format); + if (alternatives == nullptr) { + UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + wanted_format, wanted_usage, format_type); + return wanted_format; + } + + std::size_t i = 0; + for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) { + if (!IsFormatSupported(alternative, wanted_usage, format_type)) { + continue; + } + LOG_WARNING(Render_Vulkan, + "Emulating format={} with alternative format={} with usage={} and type={}", + wanted_format, alternative, wanted_usage, format_type); + return alternative; + } + + // No alternatives found, panic + UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + wanted_format, wanted_usage, format_type); + return wanted_format; +} + +void Device::ReportLoss() const { + LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); + + // Wait for the log to flush and for Nsight Aftermath to dump the results + std::this_thread::sleep_for(std::chrono::seconds{15}); +} + +void Device::SaveShader(const std::vector& spirv) const { + nsight_aftermath_tracker.SaveShader(spirv); +} + +bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { + // Disable for now to avoid converting ASTC twice. + static constexpr std::array astc_formats = { + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + }; + if (!features.textureCompressionASTC_LDR) { + return false; + } + const auto format_feature_usage{ + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; + for (const auto format : astc_formats) { + const auto physical_format_properties{physical.GetFormatProperties(format)}; + if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) { + return false; + } + } + return true; +} + +bool Device::TestDepthStencilBlits() const { + static constexpr VkFormatFeatureFlags required_features = + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const auto test_features = [](VkFormatProperties props) { + return (props.optimalTilingFeatures & required_features) == required_features; + }; + return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && + test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); +} + +bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { + const auto it = format_properties.find(wanted_format); + if (it == format_properties.end()) { + UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); + return true; + } + const auto supported_usage = GetFormatFeatures(it->second, format_type); + return (supported_usage & wanted_usage) == wanted_usage; +} + +void Device::CheckSuitability() const { + std::bitset available_extensions; + for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { + for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { + if (available_extensions[i]) { + continue; + } + const std::string_view name{property.extensionName}; + available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; + } + } + for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { + if (available_extensions[i]) { + continue; + } + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); + } + struct LimitTuple { + u32 minimum; + u32 value; + const char* name; + }; + const VkPhysicalDeviceLimits& limits{properties.limits}; + const std::array limits_report{ + LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + LimitTuple{16, limits.maxViewports, "maxViewports"}, + LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, + LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, + }; + for (const auto& tuple : limits_report) { + if (tuple.value < tuple.minimum) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, + tuple.minimum, tuple.value); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + } + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + const std::array feature_report{ + std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.imageCubeArray, "imageCubeArray"), + std::make_pair(features.independentBlend, "independentBlend"), + std::make_pair(features.depthClamp, "depthClamp"), + std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), + std::make_pair(features.largePoints, "largePoints"), + std::make_pair(features.multiViewport, "multiViewport"), + std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.geometryShader, "geometryShader"), + std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), + std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), + std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), + std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), + std::make_pair(features.shaderStorageImageWriteWithoutFormat, + "shaderStorageImageWriteWithoutFormat"), + }; + for (const auto& [is_supported, name] : feature_report) { + if (is_supported) { + continue; + } + LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } +} + +std::vector Device::LoadExtensions() { + std::vector extensions; + extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); + extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); + + bool has_khr_shader_float16_int8{}; + bool has_ext_subgroup_size_control{}; + bool has_ext_transform_feedback{}; + bool has_ext_custom_border_color{}; + bool has_ext_extended_dynamic_state{}; + bool has_ext_robustness2{}; + for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + const auto test = [&](std::optional> status, const char* name, + bool push) { + if (extension.extensionName != std::string_view(name)) { + return; + } + if (push) { + extensions.push_back(name); + } + if (status) { + status->get() = true; + } + }; + test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(khr_uniform_buffer_standard_layout, + VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); + test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); + test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, + true); + test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); + test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); + test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); + test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); + test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + if (Settings::values.renderer_debug) { + test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, + true); + } + } + + VkPhysicalDeviceFeatures2KHR features; + features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + + VkPhysicalDeviceProperties2KHR physical_properties; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + + if (has_khr_shader_float16_int8) { + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + float16_int8_features.pNext = nullptr; + features.pNext = &float16_int8_features; + + physical.GetFeatures2KHR(features); + is_float16_supported = float16_int8_features.shaderFloat16; + extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); + } + + if (has_ext_subgroup_size_control) { + VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; + subgroup_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; + subgroup_features.pNext = nullptr; + features.pNext = &subgroup_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties; + subgroup_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; + subgroup_properties.pNext = nullptr; + physical_properties.pNext = &subgroup_properties; + physical.GetProperties2KHR(physical_properties); + + is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + + if (subgroup_features.subgroupSizeControl && + subgroup_properties.minSubgroupSize <= GuestWarpSize && + subgroup_properties.maxSubgroupSize >= GuestWarpSize) { + extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + } + } else { + is_warp_potentially_bigger = true; + } + + if (has_ext_transform_feedback) { + VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; + tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + tfb_features.pNext = nullptr; + features.pNext = &tfb_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; + tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + tfb_properties.pNext = nullptr; + physical_properties.pNext = &tfb_properties; + physical.GetProperties2KHR(physical_properties); + + if (tfb_features.transformFeedback && tfb_features.geometryStreams && + tfb_properties.maxTransformFeedbackStreams >= 4 && + tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && + tfb_properties.transformFeedbackDraw) { + extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + ext_transform_feedback = true; + } + } + + if (has_ext_custom_border_color) { + VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; + border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; + border_features.pNext = nullptr; + features.pNext = &border_features; + physical.GetFeatures2KHR(features); + + if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) { + extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + ext_custom_border_color = true; + } + } + + if (has_ext_extended_dynamic_state) { + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + dynamic_state.pNext = nullptr; + features.pNext = &dynamic_state; + physical.GetFeatures2KHR(features); + + if (dynamic_state.extendedDynamicState) { + extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + ext_extended_dynamic_state = true; + } + } + + if (has_ext_robustness2) { + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = nullptr; + features.pNext = &robustness2; + physical.GetFeatures2KHR(features); + if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { + extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + ext_robustness2 = true; + } + } + + return extensions; +} + +void Device::SetupFamilies(VkSurfaceKHR surface) { + const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); + std::optional graphics; + std::optional present; + for (u32 index = 0; index < static_cast(queue_family_properties.size()); ++index) { + if (graphics && present) { + break; + } + const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; + if (queue_family.queueCount == 0) { + continue; + } + if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { + graphics = index; + } + if (physical.GetSurfaceSupportKHR(index, surface)) { + present = index; + } + } + if (!graphics) { + LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + if (!present) { + LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); + throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + } + graphics_family = *graphics; + present_family = *present; +} + +void Device::SetupFeatures() { + const auto supported_features{physical.GetFeatures()}; + is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); +} + +void Device::CollectTelemetryParameters() { + VkPhysicalDeviceDriverPropertiesKHR driver{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, + .pNext = nullptr, + .driverID = {}, + .driverName = {}, + .driverInfo = {}, + .conformanceVersion = {}, + }; + + VkPhysicalDeviceProperties2KHR device_properties{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, + .pNext = &driver, + .properties = {}, + }; + physical.GetProperties2KHR(device_properties); + + driver_id = driver.driverID; + vendor_name = driver.driverName; + + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + reported_extensions.reserve(std::size(extensions)); + for (const auto& extension : extensions) { + reported_extensions.emplace_back(extension.extensionName); + } +} + +void Device::CollectToolingInfo() { + if (!ext_tooling_info) { + return; + } + const auto vkGetPhysicalDeviceToolPropertiesEXT = + reinterpret_cast( + dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); + if (!vkGetPhysicalDeviceToolPropertiesEXT) { + return; + } + u32 tool_count = 0; + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { + return; + } + std::vector tools(tool_count); + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { + return; + } + for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { + const std::string_view name = tool.name; + LOG_INFO(Render_Vulkan, "{}", name); + has_renderdoc = has_renderdoc || name == "RenderDoc"; + has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; + } +} + +std::vector Device::GetDeviceQueueCreateInfos() const { + static constexpr float QUEUE_PRIORITY = 1.0f; + + std::unordered_set unique_queue_families{graphics_family, present_family}; + std::vector queue_cis; + queue_cis.reserve(unique_queue_families.size()); + + for (const u32 queue_family : unique_queue_families) { + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = queue_family, + .queueCount = 1, + .pQueuePriorities = nullptr, + }); + ci.pQueuePriorities = &QUEUE_PRIORITY; + } + + return queue_cis; +} + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h new file mode 100644 index 000000000..b2651e049 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -0,0 +1,306 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +/// Format usage descriptor. +enum class FormatType { Linear, Optimal, Buffer }; + +/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). +const u32 GuestWarpSize = 32; + +/// Handles data specific to a physical device. +class Device final { +public: + explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); + ~Device(); + + /** + * Returns a format supported by the device for the passed requeriments. + * @param wanted_format The ideal format to be returned. It may not be the returned format. + * @param wanted_usage The usage that must be fulfilled even if the format is not supported. + * @param format_type Format type usage. + * @returns A format supported by the device. + */ + VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Reports a device loss. + void ReportLoss() const; + + /// Reports a shader to Nsight Aftermath. + void SaveShader(const std::vector& spirv) const; + + /// Returns the dispatch loader with direct function pointers of the device. + const vk::DeviceDispatch& GetDispatchLoader() const { + return dld; + } + + /// Returns the logical device. + const vk::Device& GetLogical() const { + return logical; + } + + /// Returns the physical device. + vk::PhysicalDevice GetPhysical() const { + return physical; + } + + /// Returns the main graphics queue. + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + /// Returns the main present queue. + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Returns main graphics queue family index. + u32 GetGraphicsFamily() const { + return graphics_family; + } + + /// Returns main present queue family index. + u32 GetPresentFamily() const { + return present_family; + } + + /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. + u32 ApiVersion() const { + return properties.apiVersion; + } + + /// Returns the current driver version provided in Vulkan-formatted version numbers. + u32 GetDriverVersion() const { + return properties.driverVersion; + } + + /// Returns the device name. + std::string_view GetModelName() const { + return properties.deviceName; + } + + /// Returns the driver ID. + VkDriverIdKHR GetDriverID() const { + return driver_id; + } + + /// Returns uniform buffer alignment requeriment. + VkDeviceSize GetUniformBufferAlignment() const { + return properties.limits.minUniformBufferOffsetAlignment; + } + + /// Returns storage alignment requeriment. + VkDeviceSize GetStorageBufferAlignment() const { + return properties.limits.minStorageBufferOffsetAlignment; + } + + /// Returns the maximum range for storage buffers. + VkDeviceSize GetMaxStorageBufferRange() const { + return properties.limits.maxStorageBufferRange; + } + + /// Returns the maximum size for push constants. + VkDeviceSize GetMaxPushConstantsSize() const { + return properties.limits.maxPushConstantsSize; + } + + /// Returns the maximum size for shared memory. + u32 GetMaxComputeSharedMemorySize() const { + return properties.limits.maxComputeSharedMemorySize; + } + + /// Returns true if ASTC is natively supported. + bool IsOptimalAstcSupported() const { + return is_optimal_astc_supported; + } + + /// Returns true if the device supports float16 natively + bool IsFloat16Supported() const { + return is_float16_supported; + } + + /// Returns true if the device warp size can potentially be bigger than guest's warp size. + bool IsWarpSizePotentiallyBiggerThanGuest() const { + return is_warp_potentially_bigger; + } + + /// Returns true if the device can be forced to use the guest warp size. + bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { + return guest_warp_stages & stage; + } + + /// Returns true if formatless image load is supported. + bool IsFormatlessImageLoadSupported() const { + return is_formatless_image_load_supported; + } + + /// Returns true when blitting from and to depth stencil images is supported. + bool IsBlitDepthStencilSupported() const { + return is_blit_depth_stencil_supported; + } + + /// Returns true if the device supports VK_NV_viewport_swizzle. + bool IsNvViewportSwizzleSupported() const { + return nv_viewport_swizzle; + } + + /// Returns true if the device supports VK_EXT_scalar_block_layout. + bool IsKhrUniformBufferStandardLayoutSupported() const { + return khr_uniform_buffer_standard_layout; + } + + /// Returns true if the device supports VK_EXT_index_type_uint8. + bool IsExtIndexTypeUint8Supported() const { + return ext_index_type_uint8; + } + + /// Returns true if the device supports VK_EXT_sampler_filter_minmax. + bool IsExtSamplerFilterMinmaxSupported() const { + return ext_sampler_filter_minmax; + } + + /// Returns true if the device supports VK_EXT_depth_range_unrestricted. + bool IsExtDepthRangeUnrestrictedSupported() const { + return ext_depth_range_unrestricted; + } + + /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. + bool IsExtShaderViewportIndexLayerSupported() const { + return ext_shader_viewport_index_layer; + } + + /// Returns true if the device supports VK_EXT_transform_feedback. + bool IsExtTransformFeedbackSupported() const { + return ext_transform_feedback; + } + + /// Returns true if the device supports VK_EXT_custom_border_color. + bool IsExtCustomBorderColorSupported() const { + return ext_custom_border_color; + } + + /// Returns true if the device supports VK_EXT_extended_dynamic_state. + bool IsExtExtendedDynamicStateSupported() const { + return ext_extended_dynamic_state; + } + + /// Returns true if the device supports VK_EXT_shader_stencil_export. + bool IsExtShaderStencilExportSupported() const { + return ext_shader_stencil_export; + } + + /// Returns true when a known debugging tool is attached. + bool HasDebuggingToolAttached() const { + return has_renderdoc || has_nsight_graphics; + } + + /// Returns the vendor name reported from Vulkan. + std::string_view GetVendorName() const { + return vendor_name; + } + + /// Returns the list of available extensions. + const std::vector& GetAvailableExtensions() const { + return reported_extensions; + } + + /// Returns true if the setting for async shader compilation is enabled. + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + +private: + /// Checks if the physical device is suitable. + void CheckSuitability() const; + + /// Loads extensions into a vector and stores available ones in this object. + std::vector LoadExtensions(); + + /// Sets up queue families. + void SetupFamilies(VkSurfaceKHR surface); + + /// Sets up device features. + void SetupFeatures(); + + /// Collects telemetry information from the device. + void CollectTelemetryParameters(); + + /// Collects information about attached tools. + void CollectToolingInfo(); + + /// Returns a list of queue initialization descriptors. + std::vector GetDeviceQueueCreateInfos() const; + + /// Returns true if ASTC textures are natively supported. + bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + + /// Returns true if the device natively supports blitting depth stencil images. + bool TestDepthStencilBlits() const; + + /// Returns true if a format is supported. + bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; + + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + VkDriverIdKHR driver_id{}; ///< Driver ID. + VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. + bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. + bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + + // Telemetry parameters + std::string vendor_name; ///< Device's driver name. + std::vector reported_extensions; ///< Reported Vulkan extensions. + + /// Format properties dictionary. + std::unordered_map format_properties; + + /// Nsight Aftermath GPU crash tracker + NsightAftermathTracker nsight_aftermath_tracker; +}; + +} // namespace Vulkan -- cgit v1.2.3 From d235cf393399c386a59b5e39d39bc9efb161aea0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 01:26:52 -0300 Subject: renderer_vulkan/nsight_aftermath_tracker: Move to vulkan_common --- src/video_core/CMakeLists.txt | 4 +- .../renderer_vulkan/nsight_aftermath_tracker.cpp | 220 --------------------- .../renderer_vulkan/nsight_aftermath_tracker.h | 87 -------- .../vulkan_common/nsight_aftermath_tracker.cpp | 212 ++++++++++++++++++++ .../vulkan_common/nsight_aftermath_tracker.h | 82 ++++++++ src/video_core/vulkan_common/vulkan_device.cpp | 7 +- src/video_core/vulkan_common/vulkan_device.h | 5 +- 7 files changed, 304 insertions(+), 313 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp delete mode 100644 src/video_core/renderer_vulkan/nsight_aftermath_tracker.h create mode 100644 src/video_core/vulkan_common/nsight_aftermath_tracker.cpp create mode 100644 src/video_core/vulkan_common/nsight_aftermath_tracker.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3f3181395..f7b9d7f86 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -115,8 +115,6 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h - renderer_vulkan/nsight_aftermath_tracker.cpp - renderer_vulkan/nsight_aftermath_tracker.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -265,6 +263,8 @@ add_library(video_core STATIC vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp vulkan_common/vulkan_wrapper.h + vulkan_common/nsight_aftermath_tracker.cpp + vulkan_common/nsight_aftermath_tracker.h ) create_target_directory_groups(video_core) diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp deleted file mode 100644 index 5b01020ec..000000000 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#ifdef HAS_NSIGHT_AFTERMATH - -#include -#include -#include -#include -#include - -#include - -#define VK_NO_PROTOTYPES -#include - -#include -#include -#include -#include - -#include "common/common_paths.h" -#include "common/common_types.h" -#include "common/file_util.h" -#include "common/logging/log.h" -#include "common/scope_exit.h" - -#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" - -namespace Vulkan { - -static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; - -NsightAftermathTracker::NsightAftermathTracker() = default; - -NsightAftermathTracker::~NsightAftermathTracker() { - if (initialized) { - (void)GFSDK_Aftermath_DisableGpuCrashDumps(); - } -} - -bool NsightAftermathTracker::Initialize() { - if (!dl.Open(AFTERMATH_LIB_NAME)) { - LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); - return false; - } - - if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", - &GFSDK_Aftermath_DisableGpuCrashDumps) || - !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", - &GFSDK_Aftermath_EnableGpuCrashDumps) || - !dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier", - &GFSDK_Aftermath_GetShaderDebugInfoIdentifier) || - !dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) || - !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder", - &GFSDK_Aftermath_GpuCrashDump_CreateDecoder) || - !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder", - &GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) || - !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON", - &GFSDK_Aftermath_GpuCrashDump_GenerateJSON) || - !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON", - &GFSDK_Aftermath_GpuCrashDump_GetJSON)) { - LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); - return false; - } - - dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; - - (void)Common::FS::DeleteDirRecursively(dump_dir); - if (!Common::FS::CreateDir(dump_dir)) { - LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); - return false; - } - - if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( - GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, - GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, - ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { - LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); - return false; - } - - LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); - - initialized = true; - return true; -} - -void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { - if (!initialized) { - return; - } - - std::vector spirv_copy = spirv; - GFSDK_Aftermath_SpirvCode shader; - shader.pData = spirv_copy.data(); - shader.size = static_cast(spirv_copy.size() * 4); - - std::scoped_lock lock{mutex}; - - GFSDK_Aftermath_ShaderHash hash; - if (!GFSDK_Aftermath_SUCCEED( - GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { - LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); - return; - } - - Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); - if (!file.IsOpen()) { - LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); - return; - } - if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) { - LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); - return; - } -} - -void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, - u32 gpu_crash_dump_size) { - std::scoped_lock lock{mutex}; - - LOG_CRITICAL(Render_Vulkan, "called"); - - GFSDK_Aftermath_GpuCrashDump_Decoder decoder; - if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder( - GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) { - LOG_ERROR(Render_Vulkan, "Failed to create decoder"); - return; - } - SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); }); - - u32 json_size = 0; - if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( - decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, - GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, - this, &json_size))) { - LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); - return; - } - std::vector json(json_size); - if (!GFSDK_Aftermath_SUCCEED( - GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) { - LOG_ERROR(Render_Vulkan, "Failed to query JSON"); - return; - } - - const std::string base_name = [this] { - const int id = dump_id++; - if (id == 0) { - return fmt::format("{}/crash.nv-gpudmp", dump_dir); - } else { - return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id); - } - }(); - - std::string_view dump_view(static_cast(gpu_crash_dump), gpu_crash_dump_size); - if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { - LOG_ERROR(Render_Vulkan, "Failed to write dump file"); - return; - } - const std::string_view json_view(json.data(), json.size()); - if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { - LOG_ERROR(Render_Vulkan, "Failed to write JSON"); - return; - } -} - -void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info, - u32 shader_debug_info_size) { - std::scoped_lock lock{mutex}; - - GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier; - if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier( - GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) { - LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed"); - return; - } - - const std::string path = - fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); - Common::FS::IOFile file(path, "wb"); - if (!file.IsOpen()) { - LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); - return; - } - if (file.WriteBytes(static_cast(shader_debug_info), shader_debug_info_size) != - shader_debug_info_size) { - LOG_ERROR(Render_Vulkan, "Failed to write file {}", path); - return; - } -} - -void NsightAftermathTracker::OnCrashDumpDescriptionCallback( - PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) { - add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu"); -} - -void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump, - u32 gpu_crash_dump_size, void* user_data) { - static_cast(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump, - gpu_crash_dump_size); -} - -void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info, - u32 shader_debug_info_size, void* user_data) { - static_cast(user_data)->OnShaderDebugInfoCallback( - shader_debug_info, shader_debug_info_size); -} - -void NsightAftermathTracker::CrashDumpDescriptionCallback( - PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) { - static_cast(user_data)->OnCrashDumpDescriptionCallback( - add_description); -} - -} // namespace Vulkan - -#endif // HAS_NSIGHT_AFTERMATH diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h deleted file mode 100644 index afe7ae99e..000000000 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#define VK_NO_PROTOTYPES -#include - -#ifdef HAS_NSIGHT_AFTERMATH -#include -#include -#include -#endif - -#include "common/common_types.h" -#include "common/dynamic_library.h" - -namespace Vulkan { - -class NsightAftermathTracker { -public: - NsightAftermathTracker(); - ~NsightAftermathTracker(); - - NsightAftermathTracker(const NsightAftermathTracker&) = delete; - NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete; - - // Delete move semantics because Aftermath initialization uses a pointer to this. - NsightAftermathTracker(NsightAftermathTracker&&) = delete; - NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - - bool Initialize(); - - void SaveShader(const std::vector& spirv) const; - -private: -#ifdef HAS_NSIGHT_AFTERMATH - static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size, - void* user_data); - - static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size, - void* user_data); - - static void CrashDumpDescriptionCallback( - PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data); - - void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size); - - void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size); - - void OnCrashDumpDescriptionCallback( - PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description); - - mutable std::mutex mutex; - - std::string dump_dir; - int dump_id = 0; - - bool initialized = false; - - Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; -#endif -}; - -#ifndef HAS_NSIGHT_AFTERMATH -inline NsightAftermathTracker::NsightAftermathTracker() = default; -inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline bool NsightAftermathTracker::Initialize() { - return false; -} -inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} -#endif - -} // namespace Vulkan diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp new file mode 100644 index 000000000..8d10ac29e --- /dev/null +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -0,0 +1,212 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#ifdef HAS_NSIGHT_AFTERMATH + +#include +#include +#include +#include +#include + +#include + +#define VK_NO_PROTOTYPES +#include + +#include +#include +#include +#include + +#include "common/common_paths.h" +#include "common/common_types.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" + +#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" + +namespace Vulkan { + +static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; + +NsightAftermathTracker::NsightAftermathTracker() { + if (!dl.Open(AFTERMATH_LIB_NAME)) { + LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); + return; + } + if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", + &GFSDK_Aftermath_DisableGpuCrashDumps) || + !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", + &GFSDK_Aftermath_EnableGpuCrashDumps) || + !dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier", + &GFSDK_Aftermath_GetShaderDebugInfoIdentifier) || + !dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder", + &GFSDK_Aftermath_GpuCrashDump_CreateDecoder) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder", + &GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON", + &GFSDK_Aftermath_GpuCrashDump_GenerateJSON) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON", + &GFSDK_Aftermath_GpuCrashDump_GetJSON)) { + LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); + return false; + } + dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; + + void(Common::FS::DeleteDirRecursively(dump_dir)); + if (!Common::FS::CreateDir(dump_dir)) { + LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); + return; + } + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( + GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, + GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, + ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { + LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); + return; + } + LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); + initialized = true; +} + +NsightAftermathTracker::~NsightAftermathTracker() { + if (initialized) { + (void)GFSDK_Aftermath_DisableGpuCrashDumps(); + } +} + +void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { + if (!initialized) { + return; + } + + std::vector spirv_copy = spirv; + GFSDK_Aftermath_SpirvCode shader; + shader.pData = spirv_copy.data(); + shader.size = static_cast(spirv_copy.size() * 4); + + std::scoped_lock lock{mutex}; + + GFSDK_Aftermath_ShaderHash hash; + if (!GFSDK_Aftermath_SUCCEED( + GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { + LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); + return; + } + + Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); + if (!file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); + return; + } + if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) { + LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); + return; + } +} + +void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, + u32 gpu_crash_dump_size) { + std::scoped_lock lock{mutex}; + + LOG_CRITICAL(Render_Vulkan, "called"); + + GFSDK_Aftermath_GpuCrashDump_Decoder decoder; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder( + GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) { + LOG_ERROR(Render_Vulkan, "Failed to create decoder"); + return; + } + SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); }); + + u32 json_size = 0; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( + decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, + GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, + this, &json_size))) { + LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); + return; + } + std::vector json(json_size); + if (!GFSDK_Aftermath_SUCCEED( + GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) { + LOG_ERROR(Render_Vulkan, "Failed to query JSON"); + return; + } + + const std::string base_name = [this] { + const int id = dump_id++; + if (id == 0) { + return fmt::format("{}/crash.nv-gpudmp", dump_dir); + } else { + return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id); + } + }(); + + std::string_view dump_view(static_cast(gpu_crash_dump), gpu_crash_dump_size); + if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { + LOG_ERROR(Render_Vulkan, "Failed to write dump file"); + return; + } + const std::string_view json_view(json.data(), json.size()); + if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { + LOG_ERROR(Render_Vulkan, "Failed to write JSON"); + return; + } +} + +void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info, + u32 shader_debug_info_size) { + std::scoped_lock lock{mutex}; + + GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier( + GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) { + LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed"); + return; + } + + const std::string path = + fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); + Common::FS::IOFile file(path, "wb"); + if (!file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); + return; + } + if (file.WriteBytes(static_cast(shader_debug_info), shader_debug_info_size) != + shader_debug_info_size) { + LOG_ERROR(Render_Vulkan, "Failed to write file {}", path); + return; + } +} + +void NsightAftermathTracker::OnCrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) { + add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu"); +} + +void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump, + u32 gpu_crash_dump_size, void* user_data) { + static_cast(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump, + gpu_crash_dump_size); +} + +void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info, + u32 shader_debug_info_size, void* user_data) { + static_cast(user_data)->OnShaderDebugInfoCallback( + shader_debug_info, shader_debug_info_size); +} + +void NsightAftermathTracker::CrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) { + static_cast(user_data)->OnCrashDumpDescriptionCallback( + add_description); +} + +} // namespace Vulkan + +#endif // HAS_NSIGHT_AFTERMATH diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h new file mode 100644 index 000000000..cee3847fb --- /dev/null +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -0,0 +1,82 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#define VK_NO_PROTOTYPES +#include + +#ifdef HAS_NSIGHT_AFTERMATH +#include +#include +#include +#endif + +#include "common/common_types.h" +#include "common/dynamic_library.h" + +namespace Vulkan { + +class NsightAftermathTracker { +public: + NsightAftermathTracker(); + ~NsightAftermathTracker(); + + NsightAftermathTracker(const NsightAftermathTracker&) = delete; + NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete; + + // Delete move semantics because Aftermath initialization uses a pointer to this. + NsightAftermathTracker(NsightAftermathTracker&&) = delete; + NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; + + void SaveShader(const std::vector& spirv) const; + +private: +#ifdef HAS_NSIGHT_AFTERMATH + static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size, + void* user_data); + + static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size, + void* user_data); + + static void CrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data); + + void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size); + + void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size); + + void OnCrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description); + + mutable std::mutex mutex; + + std::string dump_dir; + int dump_id = 0; + + bool initialized = false; + + Common::DynamicLibrary dl; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; +#endif +}; + +#ifndef HAS_NSIGHT_AFTERMATH +inline NsightAftermathTracker::NsightAftermathTracker() = default; +inline NsightAftermathTracker::~NsightAftermathTracker() = default; +inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} +#endif + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 67183eed8..f300f22c9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -13,6 +13,7 @@ #include "common/assert.h" #include "core/settings.h" +#include "video_core/vulkan_common/nsight_aftermath_tracker.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -412,7 +413,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; if (nv_device_diagnostics_config) { - nsight_aftermath_tracker.Initialize(); + nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, @@ -491,7 +492,9 @@ void Device::ReportLoss() const { } void Device::SaveShader(const std::vector& spirv) const { - nsight_aftermath_tracker.SaveShader(spirv); + if (nsight_aftermath_tracker) { + nsight_aftermath_tracker->SaveShader(spirv); + } } bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b2651e049..a973c3ce4 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -10,11 +10,12 @@ #include #include "common/common_types.h" -#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +class NsightAftermathTracker; + /// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; @@ -300,7 +301,7 @@ private: std::unordered_map format_properties; /// Nsight Aftermath GPU crash tracker - NsightAftermathTracker nsight_aftermath_tracker; + std::unique_ptr nsight_aftermath_tracker; }; } // namespace Vulkan -- cgit v1.2.3 From ac1e4734c2998e764e4116592510690ee2e8af11 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Dec 2020 01:32:46 -0300 Subject: vulkan_device: Allow creating a device without surface --- src/video_core/vulkan_common/vulkan_device.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f300f22c9..75173324e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -775,7 +775,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { std::optional graphics; std::optional present; for (u32 index = 0; index < static_cast(queue_family_properties.size()); ++index) { - if (graphics && present) { + if (graphics && (present || !surface)) { break; } const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; @@ -785,7 +785,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { graphics = index; } - if (physical.GetSurfaceSupportKHR(index, surface)) { + if (surface && physical.GetSurfaceSupportKHR(index, surface)) { present = index; } } @@ -793,7 +793,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - if (!present) { + if (surface && !present) { LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } -- cgit v1.2.3 From 1ccf80536739fb9a193a0a148dcd4f2deca1bfd4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 4 Jan 2021 17:56:39 -0300 Subject: vk_rasterizer: Skip binding empty descriptor sets on compute Fixes unit tests where compute shaders had no descriptors in the set, making Vulkan drivers crash when binding an empty set. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c174e7ec..b6a1c5bbb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -628,8 +628,10 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, - descriptor_set, {}); + if (descriptor_set) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + DESCRIPTOR_SET, descriptor_set, nullptr); + } cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); } -- cgit v1.2.3 From aace20afc73d4d51ec91cdb2e71eb76a425df9a0 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 7 Jan 2021 16:51:34 +0000 Subject: texture_cache: Replace PAGE_SHIFT with PAGE_BITS PAGE_SHIFT is a #define in system headers that leaks into user code on some systems --- src/video_core/texture_cache/texture_cache.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ad86c50b4..d1080300f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,7 +61,7 @@ using VideoCore::Surface::SurfaceType; template class TextureCache { /// Address shift for caching images into a hash table - static constexpr u64 PAGE_SHIFT = 20; + static constexpr u64 PAGE_BITS = 20; /// Enables debugging features to the texture cache static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; @@ -184,8 +184,8 @@ private: template static void ForEachPage(VAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; - for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; @@ -708,7 +708,7 @@ void TextureCache

::InvalidateDepthBuffer() { template typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); + const auto it = page_table.find(cpu_addr >> PAGE_BITS); if (it == page_table.end()) { return nullptr; } @@ -1170,13 +1170,13 @@ void TextureCache

::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); return; } std::vector& image_ids = page_it->second; const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); return; } image_ids.erase(vector_it); -- cgit v1.2.3 From 21199cb9657096ee546413164110e79f33def6a8 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 7 Jan 2021 17:37:47 +0000 Subject: vulkan_library: Common::DynamicLibrary::Open is [[nodiscard]] Ignore the return value on __APPLE__ systems as well --- src/video_core/vulkan_common/vulkan_library.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp index 27c958221..557871d81 100644 --- a/src/video_core/vulkan_common/vulkan_library.cpp +++ b/src/video_core/vulkan_common/vulkan_library.cpp @@ -20,7 +20,7 @@ Common::DynamicLibrary OpenLibrary() { // Use the libvulkan.dylib from the application bundle. const std::string filename = Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; - library.Open(filename.c_str()); + void(library.Open(filename.c_str())); } #else std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); -- cgit v1.2.3 From 2c27127d04a155fe0f893e84263d58f14473785d Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 28 Dec 2020 01:02:06 -0500 Subject: nvdec syncpt incorporation laying the groundwork for async gpu, although this does not fully implement async nvdec operations --- src/video_core/cdma_pusher.cpp | 15 +++++++-------- src/video_core/cdma_pusher.h | 10 ++++------ src/video_core/command_classes/host1x.cpp | 6 ++++-- src/video_core/command_classes/sync_manager.cpp | 2 +- 4 files changed, 16 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f7..94679d5d1 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) : gpu{gpu_}, nvdec_processor(std::make_shared(gpu)), vic_processor(std::make_unique(gpu, nvdec_processor)), host1x_processor(std::make_unique(gpu)), - nvdec_sync(std::make_unique(gpu)), - vic_sync(std::make_unique(gpu)) {} + sync_manager(std::make_unique(gpu)) {} CDmaPusher::~CDmaPusher() = default; @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - nvdec_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - nvdec_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - nvdec_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - vic_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - vic_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - vic_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 0db1cd646..8ca70b6dd 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -116,12 +116,10 @@ private: void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector& arguments); GPU& gpu; - - std::shared_ptr nvdec_processor; - std::unique_ptr vic_processor; - std::unique_ptr host1x_processor; - std::unique_ptr nvdec_sync; - std::unique_ptr vic_sync; + std::shared_ptr nvdec_processor; + std::unique_ptr vic_processor; + std::unique_ptr host1x_processor; + std::unique_ptr sync_manager; ChClassId current_class{}; ThiRegisters vic_thi_state{}; ThiRegisters nvdec_thi_state{}; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index c4dd4881a..9d0a1b4d9 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -34,6 +34,8 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector& argumen } void Tegra::Host1x::Execute(u32 data) { - // This method waits on a valid syncpoint. - // TODO: Implement when proper Async is in place + u32 syncpointId = (data & 0xFF); + u32 threshold = state.load_syncpoint_payload32; + + gpu.WaitFence(syncpointId, threshold); } diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index 19dc9e0ab..579857766 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { } void SyncptIncrManager::SignalDone(u32 handle) { - const auto done_incr = + const auto& done_incr = std::find_if(increments.begin(), increments.end(), [handle](const SyncptIncr& incr) { return incr.id == handle; }); if (done_incr != increments.cend()) { -- cgit v1.2.3 From 06cef3355e415be83db3bc6d19b022de0b977580 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 28 Dec 2020 01:21:41 -0500 Subject: fix for nvdec disabled, cleanup host1x --- src/video_core/command_classes/host1x.cpp | 21 ++++--------- src/video_core/command_classes/host1x.h | 49 +++---------------------------- 2 files changed, 9 insertions(+), 61 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index 9d0a1b4d9..b12494528 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} Tegra::Host1x::~Host1x() = default; -void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { - u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u32); - std::memcpy(state_offset, &arguments, sizeof(u32)); -} - -void Tegra::Host1x::ProcessMethod(Method method, const std::vector& arguments) { - StateWrite(static_cast(method), arguments[0]); +void Tegra::Host1x::ProcessMethod(Method method, u32 argument) { switch (method) { - case Method::WaitSyncpt: - Execute(arguments[0]); - break; case Method::LoadSyncptPayload32: - syncpoint_value = arguments[0]; + syncpoint_value = argument; break; + case Method::WaitSyncpt: case Method::WaitSyncpt32: - Execute(arguments[0]); + Execute(argument); break; default: UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast(method)); @@ -34,8 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector& argumen } void Tegra::Host1x::Execute(u32 data) { - u32 syncpointId = (data & 0xFF); - u32 threshold = state.load_syncpoint_payload32; - - gpu.WaitFence(syncpointId, threshold); + gpu.WaitFence(data, syncpoint_value); } diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 013eaa0c1..7e94799dd 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -14,64 +14,23 @@ class Nvdec; class Host1x { public: - struct Host1xClassRegisters { - u32 incr_syncpt{}; - u32 incr_syncpt_ctrl{}; - u32 incr_syncpt_error{}; - INSERT_PADDING_WORDS(5); - u32 wait_syncpt{}; - u32 wait_syncpt_base{}; - u32 wait_syncpt_incr{}; - u32 load_syncpt_base{}; - u32 incr_syncpt_base{}; - u32 clear{}; - u32 wait{}; - u32 wait_with_interrupt{}; - u32 delay_use{}; - u32 tick_count_high{}; - u32 tick_count_low{}; - u32 tick_ctrl{}; - INSERT_PADDING_WORDS(23); - u32 ind_ctrl{}; - u32 ind_off2{}; - u32 ind_off{}; - std::array ind_data{}; - INSERT_PADDING_WORDS(1); - u32 load_syncpoint_payload32{}; - u32 stall_ctrl{}; - u32 wait_syncpt32{}; - u32 wait_syncpt_base32{}; - u32 load_syncpt_base32{}; - u32 incr_syncpt_base32{}; - u32 stall_count_high{}; - u32 stall_count_low{}; - u32 xref_ctrl{}; - u32 channel_xref_high{}; - u32 channel_xref_low{}; - }; - static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); - enum class Method : u32 { - WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, - LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, - WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + WaitSyncpt = 0x8, + LoadSyncptPayload32 = 0x4e, + WaitSyncpt32 = 0x50, }; explicit Host1x(GPU& gpu); ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, const std::vector& arguments); + void ProcessMethod(Method method, u32 argument); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state void Execute(u32 data); - /// Write argument into the provided offset - void StateWrite(u32 offset, u32 arguments); - u32 syncpoint_value{}; - Host1xClassRegisters state{}; GPU& gpu; }; -- cgit v1.2.3 From 16392a23cc864ef0fa8a768584fbcc64fec40f2a Mon Sep 17 00:00:00 2001 From: Ameer J Date: Mon, 28 Dec 2020 13:53:28 -0500 Subject: remove inaccurate reference Co-authored-by: LC --- src/video_core/command_classes/sync_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index 579857766..19dc9e0ab 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { } void SyncptIncrManager::SignalDone(u32 handle) { - const auto& done_incr = + const auto done_incr = std::find_if(increments.begin(), increments.end(), [handle](const SyncptIncr& incr) { return incr.id == handle; }); if (done_incr != increments.cend()) { -- cgit v1.2.3