diff options
Diffstat (limited to 'src')
173 files changed, 10965 insertions, 8003 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a22b564d6..8777df751 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -62,6 +62,7 @@ else() | |||
| 62 | -Werror=implicit-fallthrough | 62 | -Werror=implicit-fallthrough |
| 63 | -Werror=missing-declarations | 63 | -Werror=missing-declarations |
| 64 | -Werror=reorder | 64 | -Werror=reorder |
| 65 | -Werror=uninitialized | ||
| 65 | -Werror=unused-result | 66 | -Werror=unused-result |
| 66 | -Wextra | 67 | -Wextra |
| 67 | -Wmissing-declarations | 68 | -Wmissing-declarations |
diff --git a/src/common/concepts.h b/src/common/concepts.h index 5bef3ad67..aa08065a7 100644 --- a/src/common/concepts.h +++ b/src/common/concepts.h | |||
| @@ -31,4 +31,8 @@ concept DerivedFrom = requires { | |||
| 31 | std::is_convertible_v<const volatile Derived*, const volatile Base*>; | 31 | std::is_convertible_v<const volatile Derived*, const volatile Base*>; |
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | // TODO: Replace with std::convertible_to when libc++ implements it. | ||
| 35 | template <typename From, typename To> | ||
| 36 | concept ConvertibleTo = std::is_convertible_v<From, To>; | ||
| 37 | |||
| 34 | } // namespace Common | 38 | } // namespace Common |
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h index 01a577d0c..99fb8fe93 100644 --- a/src/core/hle/kernel/k_priority_queue.h +++ b/src/core/hle/kernel/k_priority_queue.h | |||
| @@ -8,11 +8,13 @@ | |||
| 8 | #pragma once | 8 | #pragma once |
| 9 | 9 | ||
| 10 | #include <array> | 10 | #include <array> |
| 11 | #include <concepts> | ||
| 11 | 12 | ||
| 12 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 13 | #include "common/bit_set.h" | 14 | #include "common/bit_set.h" |
| 14 | #include "common/bit_util.h" | 15 | #include "common/bit_util.h" |
| 15 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/concepts.h" | ||
| 16 | 18 | ||
| 17 | namespace Kernel { | 19 | namespace Kernel { |
| 18 | 20 | ||
| @@ -21,7 +23,7 @@ class Thread; | |||
| 21 | template <typename T> | 23 | template <typename T> |
| 22 | concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { | 24 | concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { |
| 23 | { t.GetAffinityMask() } | 25 | { t.GetAffinityMask() } |
| 24 | ->std::convertible_to<u64>; | 26 | ->Common::ConvertibleTo<u64>; |
| 25 | {t.SetAffinityMask(std::declval<u64>())}; | 27 | {t.SetAffinityMask(std::declval<u64>())}; |
| 26 | 28 | ||
| 27 | { t.GetAffinity(std::declval<int32_t>()) } | 29 | { t.GetAffinity(std::declval<int32_t>()) } |
| @@ -48,9 +50,9 @@ concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) { | |||
| 48 | ->KPriorityQueueAffinityMask; | 50 | ->KPriorityQueueAffinityMask; |
| 49 | 51 | ||
| 50 | { t.GetActiveCore() } | 52 | { t.GetActiveCore() } |
| 51 | ->std::convertible_to<s32>; | 53 | ->Common::ConvertibleTo<s32>; |
| 52 | { t.GetPriority() } | 54 | { t.GetPriority() } |
| 53 | ->std::convertible_to<s32>; | 55 | ->Common::ConvertibleTo<s32>; |
| 54 | }; | 56 | }; |
| 55 | 57 | ||
| 56 | template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> | 58 | template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> |
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h index 2d675b39e..2f1c1f691 100644 --- a/src/core/hle/kernel/k_scheduler_lock.h +++ b/src/core/hle/kernel/k_scheduler_lock.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 11 | #include "common/spin_lock.h" | 11 | #include "common/spin_lock.h" |
| 12 | #include "core/hardware_properties.h" | 12 | #include "core/hardware_properties.h" |
| 13 | #include "core/hle/kernel/kernel.h" | ||
| 13 | 14 | ||
| 14 | namespace Kernel { | 15 | namespace Kernel { |
| 15 | 16 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 2d225392f..de3ed25da 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -1583,7 +1583,7 @@ static void ExitThread32(Core::System& system) { | |||
| 1583 | 1583 | ||
| 1584 | /// Sleep the current thread | 1584 | /// Sleep the current thread |
| 1585 | static void SleepThread(Core::System& system, s64 nanoseconds) { | 1585 | static void SleepThread(Core::System& system, s64 nanoseconds) { |
| 1586 | LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); | 1586 | LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); |
| 1587 | 1587 | ||
| 1588 | enum class SleepType : s64 { | 1588 | enum class SleepType : s64 { |
| 1589 | YieldWithoutCoreMigration = 0, | 1589 | YieldWithoutCoreMigration = 0, |
diff --git a/src/core/hle/service/pcie/pcie.cpp b/src/core/hle/service/pcie/pcie.cpp index 80c0fc7ac..f6686fc4d 100644 --- a/src/core/hle/service/pcie/pcie.cpp +++ b/src/core/hle/service/pcie/pcie.cpp | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | 48 | ||
| 49 | class PCIe final : public ServiceFramework<PCIe> { | 49 | class PCIe final : public ServiceFramework<PCIe> { |
| 50 | public: | 50 | public: |
| 51 | explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} { | 51 | explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} { |
| 52 | // clang-format off | 52 | // clang-format off |
| 53 | static const FunctionInfo functions[] = { | 53 | static const FunctionInfo functions[] = { |
| 54 | {0, nullptr, "RegisterClassDriver"}, | 54 | {0, nullptr, "RegisterClassDriver"}, |
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 17a9225d7..412d57896 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp | |||
| @@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) { | |||
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | void Client::OnPadData(Response::PadData data, std::size_t client) { | 227 | void Client::OnPadData(Response::PadData data, std::size_t client) { |
| 228 | // Accept packets only for the correct pad | ||
| 229 | if (static_cast<u8>(clients[client].pad_index) != data.info.id) { | ||
| 230 | return; | ||
| 231 | } | ||
| 232 | |||
| 228 | LOG_TRACE(Input, "PadData packet received"); | 233 | LOG_TRACE(Input, "PadData packet received"); |
| 229 | if (data.packet_counter == clients[client].packet_sequence) { | 234 | if (data.packet_counter == clients[client].packet_sequence) { |
| 230 | LOG_WARNING( | 235 | LOG_WARNING( |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4111ce8f7..e050f9aed 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -25,6 +25,7 @@ add_library(video_core STATIC | |||
| 25 | command_classes/vic.h | 25 | command_classes/vic.h |
| 26 | compatible_formats.cpp | 26 | compatible_formats.cpp |
| 27 | compatible_formats.h | 27 | compatible_formats.h |
| 28 | delayed_destruction_ring.h | ||
| 28 | dirty_flags.cpp | 29 | dirty_flags.cpp |
| 29 | dirty_flags.h | 30 | dirty_flags.h |
| 30 | dma_pusher.cpp | 31 | dma_pusher.cpp |
| @@ -81,14 +82,10 @@ add_library(video_core STATIC | |||
| 81 | renderer_opengl/gl_device.h | 82 | renderer_opengl/gl_device.h |
| 82 | renderer_opengl/gl_fence_manager.cpp | 83 | renderer_opengl/gl_fence_manager.cpp |
| 83 | renderer_opengl/gl_fence_manager.h | 84 | renderer_opengl/gl_fence_manager.h |
| 84 | renderer_opengl/gl_framebuffer_cache.cpp | ||
| 85 | renderer_opengl/gl_framebuffer_cache.h | ||
| 86 | renderer_opengl/gl_rasterizer.cpp | 85 | renderer_opengl/gl_rasterizer.cpp |
| 87 | renderer_opengl/gl_rasterizer.h | 86 | renderer_opengl/gl_rasterizer.h |
| 88 | renderer_opengl/gl_resource_manager.cpp | 87 | renderer_opengl/gl_resource_manager.cpp |
| 89 | renderer_opengl/gl_resource_manager.h | 88 | renderer_opengl/gl_resource_manager.h |
| 90 | renderer_opengl/gl_sampler_cache.cpp | ||
| 91 | renderer_opengl/gl_sampler_cache.h | ||
| 92 | renderer_opengl/gl_shader_cache.cpp | 89 | renderer_opengl/gl_shader_cache.cpp |
| 93 | renderer_opengl/gl_shader_cache.h | 90 | renderer_opengl/gl_shader_cache.h |
| 94 | renderer_opengl/gl_shader_decompiler.cpp | 91 | renderer_opengl/gl_shader_decompiler.cpp |
| @@ -110,8 +107,10 @@ add_library(video_core STATIC | |||
| 110 | renderer_opengl/maxwell_to_gl.h | 107 | renderer_opengl/maxwell_to_gl.h |
| 111 | renderer_opengl/renderer_opengl.cpp | 108 | renderer_opengl/renderer_opengl.cpp |
| 112 | renderer_opengl/renderer_opengl.h | 109 | renderer_opengl/renderer_opengl.h |
| 113 | renderer_opengl/utils.cpp | 110 | renderer_opengl/util_shaders.cpp |
| 114 | renderer_opengl/utils.h | 111 | renderer_opengl/util_shaders.h |
| 112 | renderer_vulkan/blit_image.cpp | ||
| 113 | renderer_vulkan/blit_image.h | ||
| 115 | renderer_vulkan/fixed_pipeline_state.cpp | 114 | renderer_vulkan/fixed_pipeline_state.cpp |
| 116 | renderer_vulkan/fixed_pipeline_state.h | 115 | renderer_vulkan/fixed_pipeline_state.h |
| 117 | renderer_vulkan/maxwell_to_vk.cpp | 116 | renderer_vulkan/maxwell_to_vk.cpp |
| @@ -138,8 +137,6 @@ add_library(video_core STATIC | |||
| 138 | renderer_vulkan/vk_fence_manager.h | 137 | renderer_vulkan/vk_fence_manager.h |
| 139 | renderer_vulkan/vk_graphics_pipeline.cpp | 138 | renderer_vulkan/vk_graphics_pipeline.cpp |
| 140 | renderer_vulkan/vk_graphics_pipeline.h | 139 | renderer_vulkan/vk_graphics_pipeline.h |
| 141 | renderer_vulkan/vk_image.cpp | ||
| 142 | renderer_vulkan/vk_image.h | ||
| 143 | renderer_vulkan/vk_master_semaphore.cpp | 140 | renderer_vulkan/vk_master_semaphore.cpp |
| 144 | renderer_vulkan/vk_master_semaphore.h | 141 | renderer_vulkan/vk_master_semaphore.h |
| 145 | renderer_vulkan/vk_memory_manager.cpp | 142 | renderer_vulkan/vk_memory_manager.cpp |
| @@ -150,12 +147,8 @@ add_library(video_core STATIC | |||
| 150 | renderer_vulkan/vk_query_cache.h | 147 | renderer_vulkan/vk_query_cache.h |
| 151 | renderer_vulkan/vk_rasterizer.cpp | 148 | renderer_vulkan/vk_rasterizer.cpp |
| 152 | renderer_vulkan/vk_rasterizer.h | 149 | renderer_vulkan/vk_rasterizer.h |
| 153 | renderer_vulkan/vk_renderpass_cache.cpp | ||
| 154 | renderer_vulkan/vk_renderpass_cache.h | ||
| 155 | renderer_vulkan/vk_resource_pool.cpp | 150 | renderer_vulkan/vk_resource_pool.cpp |
| 156 | renderer_vulkan/vk_resource_pool.h | 151 | renderer_vulkan/vk_resource_pool.h |
| 157 | renderer_vulkan/vk_sampler_cache.cpp | ||
| 158 | renderer_vulkan/vk_sampler_cache.h | ||
| 159 | renderer_vulkan/vk_scheduler.cpp | 152 | renderer_vulkan/vk_scheduler.cpp |
| 160 | renderer_vulkan/vk_scheduler.h | 153 | renderer_vulkan/vk_scheduler.h |
| 161 | renderer_vulkan/vk_shader_decompiler.cpp | 154 | renderer_vulkan/vk_shader_decompiler.cpp |
| @@ -176,8 +169,6 @@ add_library(video_core STATIC | |||
| 176 | renderer_vulkan/vk_update_descriptor.h | 169 | renderer_vulkan/vk_update_descriptor.h |
| 177 | renderer_vulkan/wrapper.cpp | 170 | renderer_vulkan/wrapper.cpp |
| 178 | renderer_vulkan/wrapper.h | 171 | renderer_vulkan/wrapper.h |
| 179 | sampler_cache.cpp | ||
| 180 | sampler_cache.h | ||
| 181 | shader_cache.h | 172 | shader_cache.h |
| 182 | shader_notify.cpp | 173 | shader_notify.cpp |
| 183 | shader_notify.h | 174 | shader_notify.h |
| @@ -234,19 +225,32 @@ add_library(video_core STATIC | |||
| 234 | shader/transform_feedback.h | 225 | shader/transform_feedback.h |
| 235 | surface.cpp | 226 | surface.cpp |
| 236 | surface.h | 227 | surface.h |
| 228 | texture_cache/accelerated_swizzle.cpp | ||
| 229 | texture_cache/accelerated_swizzle.h | ||
| 230 | texture_cache/decode_bc4.cpp | ||
| 231 | texture_cache/decode_bc4.h | ||
| 232 | texture_cache/descriptor_table.h | ||
| 233 | texture_cache/formatter.cpp | ||
| 234 | texture_cache/formatter.h | ||
| 237 | texture_cache/format_lookup_table.cpp | 235 | texture_cache/format_lookup_table.cpp |
| 238 | texture_cache/format_lookup_table.h | 236 | texture_cache/format_lookup_table.h |
| 239 | texture_cache/surface_base.cpp | 237 | texture_cache/image_base.cpp |
| 240 | texture_cache/surface_base.h | 238 | texture_cache/image_base.h |
| 241 | texture_cache/surface_params.cpp | 239 | texture_cache/image_info.cpp |
| 242 | texture_cache/surface_params.h | 240 | texture_cache/image_info.h |
| 243 | texture_cache/surface_view.cpp | 241 | texture_cache/image_view_base.cpp |
| 244 | texture_cache/surface_view.h | 242 | texture_cache/image_view_base.h |
| 243 | texture_cache/image_view_info.cpp | ||
| 244 | texture_cache/image_view_info.h | ||
| 245 | texture_cache/render_targets.h | ||
| 246 | texture_cache/samples_helper.h | ||
| 247 | texture_cache/slot_vector.h | ||
| 245 | texture_cache/texture_cache.h | 248 | texture_cache/texture_cache.h |
| 249 | texture_cache/types.h | ||
| 250 | texture_cache/util.cpp | ||
| 251 | texture_cache/util.h | ||
| 246 | textures/astc.cpp | 252 | textures/astc.cpp |
| 247 | textures/astc.h | 253 | textures/astc.h |
| 248 | textures/convert.cpp | ||
| 249 | textures/convert.h | ||
| 250 | textures/decoders.cpp | 254 | textures/decoders.cpp |
| 251 | textures/decoders.h | 255 | textures/decoders.h |
| 252 | textures/texture.cpp | 256 | textures/texture.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 38961f3fd..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -118,20 +118,17 @@ public: | |||
| 118 | /// Prepares the buffer cache for data uploading | 118 | /// Prepares the buffer cache for data uploading |
| 119 | /// @param max_size Maximum number of bytes that will be uploaded | 119 | /// @param max_size Maximum number of bytes that will be uploaded |
| 120 | /// @return True when a stream buffer invalidation was required, false otherwise | 120 | /// @return True when a stream buffer invalidation was required, false otherwise |
| 121 | bool Map(std::size_t max_size) { | 121 | void Map(std::size_t max_size) { |
| 122 | std::lock_guard lock{mutex}; | 122 | std::lock_guard lock{mutex}; |
| 123 | 123 | ||
| 124 | bool invalidated; | 124 | std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); |
| 125 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 126 | buffer_offset = buffer_offset_base; | 125 | buffer_offset = buffer_offset_base; |
| 127 | |||
| 128 | return invalidated; | ||
| 129 | } | 126 | } |
| 130 | 127 | ||
| 131 | /// Finishes the upload stream | 128 | /// Finishes the upload stream |
| 132 | void Unmap() { | 129 | void Unmap() { |
| 133 | std::lock_guard lock{mutex}; | 130 | std::lock_guard lock{mutex}; |
| 134 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | 131 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); |
| 135 | } | 132 | } |
| 136 | 133 | ||
| 137 | /// Function called at the end of each frame, inteded for deferred operations | 134 | /// Function called at the end of each frame, inteded for deferred operations |
| @@ -261,9 +258,9 @@ public: | |||
| 261 | protected: | 258 | protected: |
| 262 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 259 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 263 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 260 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 264 | std::unique_ptr<StreamBuffer> stream_buffer_) | 261 | StreamBuffer& stream_buffer_) |
| 265 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, | 262 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, |
| 266 | stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} | 263 | stream_buffer{stream_buffer_} {} |
| 267 | 264 | ||
| 268 | ~BufferCache() = default; | 265 | ~BufferCache() = default; |
| 269 | 266 | ||
| @@ -441,7 +438,7 @@ private: | |||
| 441 | 438 | ||
| 442 | buffer_ptr += size; | 439 | buffer_ptr += size; |
| 443 | buffer_offset += size; | 440 | buffer_offset += size; |
| 444 | return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; | 441 | return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; |
| 445 | } | 442 | } |
| 446 | 443 | ||
| 447 | void AlignBuffer(std::size_t alignment) { | 444 | void AlignBuffer(std::size_t alignment) { |
| @@ -567,9 +564,7 @@ private: | |||
| 567 | VideoCore::RasterizerInterface& rasterizer; | 564 | VideoCore::RasterizerInterface& rasterizer; |
| 568 | Tegra::MemoryManager& gpu_memory; | 565 | Tegra::MemoryManager& gpu_memory; |
| 569 | Core::Memory::Memory& cpu_memory; | 566 | Core::Memory::Memory& cpu_memory; |
| 570 | 567 | StreamBuffer& stream_buffer; | |
| 571 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 572 | BufferType stream_buffer_handle; | ||
| 573 | 568 | ||
| 574 | u8* buffer_ptr = nullptr; | 569 | u8* buffer_ptr = nullptr; |
| 575 | u64 buffer_offset = 0; | 570 | u64 buffer_offset = 0; |
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 11 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/texture_cache/surface_params.h" | 12 | #include "video_core/textures/decoders.h" |
| 13 | 13 | ||
| 14 | extern "C" { | 14 | extern "C" { |
| 15 | #include <libswscale/swscale.h> | 15 | #include <libswscale/swscale.h> |
| @@ -105,9 +105,9 @@ void Vic::Execute() { | |||
| 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, | 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, |
| 106 | block_height, 0); | 106 | block_height, 0); |
| 107 | std::vector<u8> swizzled_data(size); | 107 | std::vector<u8> swizzled_data(size); |
| 108 | Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, | 108 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, |
| 109 | swizzled_data.data(), converted_frame_buffer.get(), | 109 | frame->width, 4, swizzled_data.data(), |
| 110 | false, block_height, 0, 1); | 110 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 111 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); |
| 113 | gpu.Maxwell3D().OnMemoryWrite(); | 113 | gpu.Maxwell3D().OnMemoryWrite(); |
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..1619d8664 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp | |||
| @@ -3,9 +3,9 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <bitset> | ||
| 7 | #include <cstddef> | 6 | #include <cstddef> |
| 8 | 7 | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/compatible_formats.h" | 9 | #include "video_core/compatible_formats.h" |
| 10 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 11 | 11 | ||
| @@ -13,23 +13,25 @@ namespace VideoCore::Surface { | |||
| 13 | 13 | ||
| 14 | namespace { | 14 | namespace { |
| 15 | 15 | ||
| 16 | using Table = std::array<std::array<u64, 2>, MaxPixelFormat>; | ||
| 17 | |||
| 16 | // Compatibility table taken from Table 3.X.2 in: | 18 | // Compatibility table taken from Table 3.X.2 in: |
| 17 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt | 19 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt |
| 18 | 20 | ||
| 19 | constexpr std::array VIEW_CLASS_128_BITS = { | 21 | constexpr std::array VIEW_CLASS_128_BITS{ |
| 20 | PixelFormat::R32G32B32A32_FLOAT, | 22 | PixelFormat::R32G32B32A32_FLOAT, |
| 21 | PixelFormat::R32G32B32A32_UINT, | 23 | PixelFormat::R32G32B32A32_UINT, |
| 22 | PixelFormat::R32G32B32A32_SINT, | 24 | PixelFormat::R32G32B32A32_SINT, |
| 23 | }; | 25 | }; |
| 24 | 26 | ||
| 25 | constexpr std::array VIEW_CLASS_96_BITS = { | 27 | constexpr std::array VIEW_CLASS_96_BITS{ |
| 26 | PixelFormat::R32G32B32_FLOAT, | 28 | PixelFormat::R32G32B32_FLOAT, |
| 27 | }; | 29 | }; |
| 28 | // Missing formats: | 30 | // Missing formats: |
| 29 | // PixelFormat::RGB32UI, | 31 | // PixelFormat::RGB32UI, |
| 30 | // PixelFormat::RGB32I, | 32 | // PixelFormat::RGB32I, |
| 31 | 33 | ||
| 32 | constexpr std::array VIEW_CLASS_64_BITS = { | 34 | constexpr std::array VIEW_CLASS_64_BITS{ |
| 33 | PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, | 35 | PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, |
| 34 | PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, | 36 | PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, |
| 35 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, | 37 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, |
| @@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = { | |||
| 38 | 40 | ||
| 39 | // TODO: How should we handle 48 bits? | 41 | // TODO: How should we handle 48 bits? |
| 40 | 42 | ||
| 41 | constexpr std::array VIEW_CLASS_32_BITS = { | 43 | constexpr std::array VIEW_CLASS_32_BITS{ |
| 42 | PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, | 44 | PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, |
| 43 | PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, | 45 | PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, |
| 44 | PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, | 46 | PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, |
| @@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = { | |||
| 50 | 52 | ||
| 51 | // TODO: How should we handle 24 bits? | 53 | // TODO: How should we handle 24 bits? |
| 52 | 54 | ||
| 53 | constexpr std::array VIEW_CLASS_16_BITS = { | 55 | constexpr std::array VIEW_CLASS_16_BITS{ |
| 54 | PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, | 56 | PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, |
| 55 | PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, | 57 | PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, |
| 56 | PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, | 58 | PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, |
| 57 | }; | 59 | }; |
| 58 | 60 | ||
| 59 | constexpr std::array VIEW_CLASS_8_BITS = { | 61 | constexpr std::array VIEW_CLASS_8_BITS{ |
| 60 | PixelFormat::R8_UINT, | 62 | PixelFormat::R8_UINT, |
| 61 | PixelFormat::R8_UNORM, | 63 | PixelFormat::R8_UNORM, |
| 62 | PixelFormat::R8_SINT, | 64 | PixelFormat::R8_SINT, |
| 63 | PixelFormat::R8_SNORM, | 65 | PixelFormat::R8_SNORM, |
| 64 | }; | 66 | }; |
| 65 | 67 | ||
| 66 | constexpr std::array VIEW_CLASS_RGTC1_RED = { | 68 | constexpr std::array VIEW_CLASS_RGTC1_RED{ |
| 67 | PixelFormat::BC4_UNORM, | 69 | PixelFormat::BC4_UNORM, |
| 68 | PixelFormat::BC4_SNORM, | 70 | PixelFormat::BC4_SNORM, |
| 69 | }; | 71 | }; |
| 70 | 72 | ||
| 71 | constexpr std::array VIEW_CLASS_RGTC2_RG = { | 73 | constexpr std::array VIEW_CLASS_RGTC2_RG{ |
| 72 | PixelFormat::BC5_UNORM, | 74 | PixelFormat::BC5_UNORM, |
| 73 | PixelFormat::BC5_SNORM, | 75 | PixelFormat::BC5_SNORM, |
| 74 | }; | 76 | }; |
| 75 | 77 | ||
| 76 | constexpr std::array VIEW_CLASS_BPTC_UNORM = { | 78 | constexpr std::array VIEW_CLASS_BPTC_UNORM{ |
| 77 | PixelFormat::BC7_UNORM, | 79 | PixelFormat::BC7_UNORM, |
| 78 | PixelFormat::BC7_SRGB, | 80 | PixelFormat::BC7_SRGB, |
| 79 | }; | 81 | }; |
| 80 | 82 | ||
| 81 | constexpr std::array VIEW_CLASS_BPTC_FLOAT = { | 83 | constexpr std::array VIEW_CLASS_BPTC_FLOAT{ |
| 82 | PixelFormat::BC6H_SFLOAT, | 84 | PixelFormat::BC6H_SFLOAT, |
| 83 | PixelFormat::BC6H_UFLOAT, | 85 | PixelFormat::BC6H_UFLOAT, |
| 84 | }; | 86 | }; |
| 85 | 87 | ||
| 88 | constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ | ||
| 89 | PixelFormat::ASTC_2D_4X4_UNORM, | ||
| 90 | PixelFormat::ASTC_2D_4X4_SRGB, | ||
| 91 | }; | ||
| 92 | |||
| 93 | constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ | ||
| 94 | PixelFormat::ASTC_2D_5X4_UNORM, | ||
| 95 | PixelFormat::ASTC_2D_5X4_SRGB, | ||
| 96 | }; | ||
| 97 | |||
| 98 | constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ | ||
| 99 | PixelFormat::ASTC_2D_5X5_UNORM, | ||
| 100 | PixelFormat::ASTC_2D_5X5_SRGB, | ||
| 101 | }; | ||
| 102 | |||
| 103 | constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ | ||
| 104 | PixelFormat::ASTC_2D_6X5_UNORM, | ||
| 105 | PixelFormat::ASTC_2D_6X5_SRGB, | ||
| 106 | }; | ||
| 107 | |||
| 108 | constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ | ||
| 109 | PixelFormat::ASTC_2D_6X6_UNORM, | ||
| 110 | PixelFormat::ASTC_2D_6X6_SRGB, | ||
| 111 | }; | ||
| 112 | |||
| 113 | constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ | ||
| 114 | PixelFormat::ASTC_2D_8X5_UNORM, | ||
| 115 | PixelFormat::ASTC_2D_8X5_SRGB, | ||
| 116 | }; | ||
| 117 | |||
| 118 | constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ | ||
| 119 | PixelFormat::ASTC_2D_8X8_UNORM, | ||
| 120 | PixelFormat::ASTC_2D_8X8_SRGB, | ||
| 121 | }; | ||
| 122 | |||
| 123 | // Missing formats: | ||
| 124 | // PixelFormat::ASTC_2D_10X5_UNORM | ||
| 125 | // PixelFormat::ASTC_2D_10X5_SRGB | ||
| 126 | |||
| 127 | // Missing formats: | ||
| 128 | // PixelFormat::ASTC_2D_10X6_UNORM | ||
| 129 | // PixelFormat::ASTC_2D_10X6_SRGB | ||
| 130 | |||
| 131 | constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ | ||
| 132 | PixelFormat::ASTC_2D_10X8_UNORM, | ||
| 133 | PixelFormat::ASTC_2D_10X8_SRGB, | ||
| 134 | }; | ||
| 135 | |||
| 136 | constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ | ||
| 137 | PixelFormat::ASTC_2D_10X10_UNORM, | ||
| 138 | PixelFormat::ASTC_2D_10X10_SRGB, | ||
| 139 | }; | ||
| 140 | |||
| 141 | // Missing formats | ||
| 142 | // ASTC_2D_12X10_UNORM, | ||
| 143 | // ASTC_2D_12X10_SRGB, | ||
| 144 | |||
| 145 | constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ | ||
| 146 | PixelFormat::ASTC_2D_12X12_UNORM, | ||
| 147 | PixelFormat::ASTC_2D_12X12_SRGB, | ||
| 148 | }; | ||
| 149 | |||
| 86 | // Compatibility table taken from Table 4.X.1 in: | 150 | // Compatibility table taken from Table 4.X.1 in: |
| 87 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt | 151 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt |
| 88 | 152 | ||
| 89 | constexpr std::array COPY_CLASS_128_BITS = { | 153 | constexpr std::array COPY_CLASS_128_BITS{ |
| 90 | PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, | 154 | PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, |
| 91 | PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, | 155 | PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, |
| 92 | PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, | 156 | PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, |
| @@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = { | |||
| 97 | // PixelFormat::RGBA32I | 161 | // PixelFormat::RGBA32I |
| 98 | // COMPRESSED_RG_RGTC2 | 162 | // COMPRESSED_RG_RGTC2 |
| 99 | 163 | ||
| 100 | constexpr std::array COPY_CLASS_64_BITS = { | 164 | constexpr std::array COPY_CLASS_64_BITS{ |
| 101 | PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, | 165 | PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, |
| 102 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, | 166 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, |
| 103 | PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, | 167 | PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, |
| @@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = { | |||
| 110 | // COMPRESSED_RGBA_S3TC_DXT1_EXT | 174 | // COMPRESSED_RGBA_S3TC_DXT1_EXT |
| 111 | // COMPRESSED_SIGNED_RED_RGTC1 | 175 | // COMPRESSED_SIGNED_RED_RGTC1 |
| 112 | 176 | ||
| 113 | void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { | 177 | constexpr void Enable(Table& table, size_t format_a, size_t format_b) { |
| 114 | compatiblity[format_a][format_b] = true; | 178 | table[format_a][format_b / 64] |= u64(1) << (format_b % 64); |
| 115 | compatiblity[format_b][format_a] = true; | 179 | table[format_b][format_a / 64] |= u64(1) << (format_a % 64); |
| 116 | } | 180 | } |
| 117 | 181 | ||
| 118 | void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { | 182 | constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { |
| 119 | Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); | 183 | Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); |
| 120 | } | 184 | } |
| 121 | 185 | ||
| 122 | template <typename Range> | 186 | template <typename Range> |
| 123 | void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { | 187 | constexpr void EnableRange(Table& table, const Range& range) { |
| 124 | for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { | 188 | for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { |
| 125 | for (auto it_b = it_a; it_b != range.end(); ++it_b) { | 189 | for (auto it_b = it_a; it_b != range.end(); ++it_b) { |
| 126 | Enable(compatibility, *it_a, *it_b); | 190 | Enable(table, *it_a, *it_b); |
| 127 | } | 191 | } |
| 128 | } | 192 | } |
| 129 | } | 193 | } |
| 130 | 194 | ||
| 131 | } // Anonymous namespace | 195 | constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { |
| 196 | const size_t a = static_cast<size_t>(format_a); | ||
| 197 | const size_t b = static_cast<size_t>(format_b); | ||
| 198 | return ((table[a][b / 64] >> (b % 64)) & 1) != 0; | ||
| 199 | } | ||
| 132 | 200 | ||
| 133 | FormatCompatibility::FormatCompatibility() { | 201 | constexpr Table MakeViewTable() { |
| 202 | Table view{}; | ||
| 134 | for (size_t i = 0; i < MaxPixelFormat; ++i) { | 203 | for (size_t i = 0; i < MaxPixelFormat; ++i) { |
| 135 | // Identity is allowed | 204 | // Identity is allowed |
| 136 | Enable(view, i, i); | 205 | Enable(view, i, i); |
| 137 | } | 206 | } |
| 138 | |||
| 139 | EnableRange(view, VIEW_CLASS_128_BITS); | 207 | EnableRange(view, VIEW_CLASS_128_BITS); |
| 140 | EnableRange(view, VIEW_CLASS_96_BITS); | 208 | EnableRange(view, VIEW_CLASS_96_BITS); |
| 141 | EnableRange(view, VIEW_CLASS_64_BITS); | 209 | EnableRange(view, VIEW_CLASS_64_BITS); |
| @@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() { | |||
| 146 | EnableRange(view, VIEW_CLASS_RGTC2_RG); | 214 | EnableRange(view, VIEW_CLASS_RGTC2_RG); |
| 147 | EnableRange(view, VIEW_CLASS_BPTC_UNORM); | 215 | EnableRange(view, VIEW_CLASS_BPTC_UNORM); |
| 148 | EnableRange(view, VIEW_CLASS_BPTC_FLOAT); | 216 | EnableRange(view, VIEW_CLASS_BPTC_FLOAT); |
| 217 | EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); | ||
| 218 | EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); | ||
| 219 | EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); | ||
| 220 | EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); | ||
| 221 | EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); | ||
| 222 | EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); | ||
| 223 | EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); | ||
| 224 | EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); | ||
| 225 | EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); | ||
| 226 | EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); | ||
| 227 | return view; | ||
| 228 | } | ||
| 149 | 229 | ||
| 150 | copy = view; | 230 | constexpr Table MakeCopyTable() { |
| 231 | Table copy = MakeViewTable(); | ||
| 151 | EnableRange(copy, COPY_CLASS_128_BITS); | 232 | EnableRange(copy, COPY_CLASS_128_BITS); |
| 152 | EnableRange(copy, COPY_CLASS_64_BITS); | 233 | EnableRange(copy, COPY_CLASS_64_BITS); |
| 234 | return copy; | ||
| 235 | } | ||
| 236 | |||
| 237 | } // Anonymous namespace | ||
| 238 | |||
| 239 | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { | ||
| 240 | static constexpr Table TABLE = MakeViewTable(); | ||
| 241 | return IsSupported(TABLE, format_a, format_b); | ||
| 242 | } | ||
| 243 | |||
| 244 | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { | ||
| 245 | static constexpr Table TABLE = MakeCopyTable(); | ||
| 246 | return IsSupported(TABLE, format_a, format_b); | ||
| 153 | } | 247 | } |
| 154 | 248 | ||
| 155 | } // namespace VideoCore::Surface | 249 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..b5eb03bea 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h | |||
| @@ -4,31 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <cstddef> | ||
| 10 | |||
| 11 | #include "video_core/surface.h" | 7 | #include "video_core/surface.h" |
| 12 | 8 | ||
| 13 | namespace VideoCore::Surface { | 9 | namespace VideoCore::Surface { |
| 14 | 10 | ||
| 15 | class FormatCompatibility { | 11 | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); |
| 16 | public: | ||
| 17 | using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; | ||
| 18 | |||
| 19 | explicit FormatCompatibility(); | ||
| 20 | |||
| 21 | bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 22 | return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 26 | return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 27 | } | ||
| 28 | 12 | ||
| 29 | private: | 13 | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); |
| 30 | Table view; | ||
| 31 | Table copy; | ||
| 32 | }; | ||
| 33 | 14 | ||
| 34 | } // namespace VideoCore::Surface | 15 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | /// Container to push objects to be destroyed a few ticks in the future | ||
| 15 | template <typename T, size_t TICKS_TO_DESTROY> | ||
| 16 | class DelayedDestructionRing { | ||
| 17 | public: | ||
| 18 | void Tick() { | ||
| 19 | index = (index + 1) % TICKS_TO_DESTROY; | ||
| 20 | elements[index].clear(); | ||
| 21 | } | ||
| 22 | |||
| 23 | void Push(T&& object) { | ||
| 24 | elements[index].push_back(std::move(object)); | ||
| 25 | } | ||
| 26 | |||
| 27 | private: | ||
| 28 | size_t index = 0; | ||
| 29 | std::array<std::vector<T>, TICKS_TO_DESTROY> elements; | ||
| 30 | }; | ||
| 31 | |||
| 32 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 2faa6ef0e..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -16,6 +16,9 @@ namespace VideoCommon::Dirty { | |||
| 16 | using Tegra::Engines::Maxwell3D; | 16 | using Tegra::Engines::Maxwell3D; |
| 17 | 17 | ||
| 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { |
| 19 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); | ||
| 20 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); | ||
| 21 | |||
| 19 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | 22 | static constexpr std::size_t num_per_rt = NUM(rt[0]); |
| 20 | static constexpr std::size_t begin = OFF(rt); | 23 | static constexpr std::size_t begin = OFF(rt); |
| 21 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | 24 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; |
| @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl | |||
| 23 | FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); | 26 | FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); |
| 24 | } | 27 | } |
| 25 | FillBlock(tables[1], begin, num, RenderTargets); | 28 | FillBlock(tables[1], begin, num, RenderTargets); |
| 29 | FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); | ||
| 30 | |||
| 31 | tables[0][OFF(rt_control)] = RenderTargets; | ||
| 32 | tables[1][OFF(rt_control)] = RenderTargetControl; | ||
| 26 | 33 | ||
| 27 | static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; | 34 | static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; |
| 28 | for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { | 35 | for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty { | |||
| 16 | enum : u8 { | 16 | enum : u8 { |
| 17 | NullEntry = 0, | 17 | NullEntry = 0, |
| 18 | 18 | ||
| 19 | Descriptors, | ||
| 20 | |||
| 19 | RenderTargets, | 21 | RenderTargets, |
| 22 | RenderTargetControl, | ||
| 20 | ColorBuffer0, | 23 | ColorBuffer0, |
| 21 | ColorBuffer1, | 24 | ColorBuffer1, |
| 22 | ColorBuffer2, | 25 | ColorBuffer2, |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -10,7 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 12 | 12 | ||
| 13 | Fermi2D::Fermi2D() = default; | 13 | Fermi2D::Fermi2D() { |
| 14 | // Nvidia's OpenGL driver seems to assume these values | ||
| 15 | regs.src.depth = 1; | ||
| 16 | regs.dst.depth = 1; | ||
| 17 | } | ||
| 14 | 18 | ||
| 15 | Fermi2D::~Fermi2D() = default; | 19 | Fermi2D::~Fermi2D() = default; |
| 16 | 20 | ||
| @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | |||
| 21 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 25 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 22 | ASSERT_MSG(method < Regs::NUM_REGS, | 26 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 23 | "Invalid Fermi2D register, increase the size of the Regs structure"); | 27 | "Invalid Fermi2D register, increase the size of the Regs structure"); |
| 24 | |||
| 25 | regs.reg_array[method] = method_argument; | 28 | regs.reg_array[method] = method_argument; |
| 26 | 29 | ||
| 27 | switch (method) { | 30 | if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { |
| 28 | // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, | 31 | Blit(); |
| 29 | // so trigger on the second 32-bit write. | ||
| 30 | case FERMI2D_REG_INDEX(blit_src_y) + 1: { | ||
| 31 | HandleSurfaceCopy(); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 32 | } |
| 35 | } | 33 | } |
| 36 | 34 | ||
| 37 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | 35 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { |
| 38 | for (std::size_t i = 0; i < amount; i++) { | 36 | for (u32 i = 0; i < amount; ++i) { |
| 39 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 37 | CallMethod(method, base_start[i], methods_pending - i <= 1); |
| 40 | } | 38 | } |
| 41 | } | 39 | } |
| 42 | 40 | ||
| 43 | static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { | 41 | void Fermi2D::Blit() { |
| 44 | const u32 line_a = src_2 - src_1; | 42 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", |
| 45 | const u32 line_b = dst_2 - dst_1; | 43 | regs.src.Address(), regs.dst.Address()); |
| 46 | const u32 excess = std::max<s32>(0, line_a - src_line + src_1); | ||
| 47 | return {line_b - (excess * line_b) / line_a, excess}; | ||
| 48 | } | ||
| 49 | |||
| 50 | void Fermi2D::HandleSurfaceCopy() { | ||
| 51 | LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); | ||
| 52 | 44 | ||
| 53 | // TODO(Subv): Only raw copies are implemented. | 45 | UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); |
| 54 | ASSERT(regs.operation == Operation::SrcCopy); | 46 | UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); |
| 47 | UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); | ||
| 48 | UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); | ||
| 49 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); | ||
| 55 | 50 | ||
| 56 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; | 51 | const auto& args = regs.pixels_from_memory; |
| 57 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; | 52 | const Config config{ |
| 58 | u32 src_blit_x2, src_blit_y2; | ||
| 59 | if (regs.blit_control.origin == Origin::Corner) { | ||
| 60 | src_blit_x2 = | ||
| 61 | static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); | ||
| 62 | src_blit_y2 = | ||
| 63 | static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); | ||
| 64 | } else { | ||
| 65 | src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); | ||
| 66 | src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); | ||
| 67 | } | ||
| 68 | u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; | ||
| 69 | u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; | ||
| 70 | const auto [new_dst_w, src_excess_x] = | ||
| 71 | DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); | ||
| 72 | const auto [new_dst_h, src_excess_y] = | ||
| 73 | DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); | ||
| 74 | dst_blit_x2 = new_dst_w + regs.blit_dst_x; | ||
| 75 | src_blit_x2 = src_blit_x2 - src_excess_x; | ||
| 76 | dst_blit_y2 = new_dst_h + regs.blit_dst_y; | ||
| 77 | src_blit_y2 = src_blit_y2 - src_excess_y; | ||
| 78 | const auto [new_src_w, dst_excess_x] = | ||
| 79 | DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); | ||
| 80 | const auto [new_src_h, dst_excess_y] = | ||
| 81 | DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); | ||
| 82 | src_blit_x2 = new_src_w + src_blit_x1; | ||
| 83 | dst_blit_x2 = dst_blit_x2 - dst_excess_x; | ||
| 84 | src_blit_y2 = new_src_h + src_blit_y1; | ||
| 85 | dst_blit_y2 = dst_blit_y2 - dst_excess_y; | ||
| 86 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; | ||
| 87 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, | ||
| 88 | dst_blit_y2}; | ||
| 89 | const Config copy_config{ | ||
| 90 | .operation = regs.operation, | 53 | .operation = regs.operation, |
| 91 | .filter = regs.blit_control.filter, | 54 | .filter = args.sample_mode.filter, |
| 92 | .src_rect = src_rect, | 55 | .dst_x0 = args.dst_x0, |
| 93 | .dst_rect = dst_rect, | 56 | .dst_y0 = args.dst_y0, |
| 57 | .dst_x1 = args.dst_x0 + args.dst_width, | ||
| 58 | .dst_y1 = args.dst_y0 + args.dst_height, | ||
| 59 | .src_x0 = static_cast<s32>(args.src_x0 >> 32), | ||
| 60 | .src_y0 = static_cast<s32>(args.src_y0 >> 32), | ||
| 61 | .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), | ||
| 62 | .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), | ||
| 94 | }; | 63 | }; |
| 95 | if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { | 64 | if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { |
| 96 | UNIMPLEMENTED(); | 65 | UNIMPLEMENTED(); |
| 97 | } | 66 | } |
| 98 | } | 67 | } |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -53,8 +53,8 @@ public: | |||
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | enum class Filter : u32 { | 55 | enum class Filter : u32 { |
| 56 | PointSample = 0, // Nearest | 56 | Point = 0, |
| 57 | Linear = 1, | 57 | Bilinear = 1, |
| 58 | }; | 58 | }; |
| 59 | 59 | ||
| 60 | enum class Operation : u32 { | 60 | enum class Operation : u32 { |
| @@ -67,88 +67,235 @@ public: | |||
| 67 | BlendPremult = 6, | 67 | BlendPremult = 6, |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
| 70 | struct Regs { | 70 | enum class MemoryLayout : u32 { |
| 71 | static constexpr std::size_t NUM_REGS = 0x258; | 71 | BlockLinear = 0, |
| 72 | Pitch = 1, | ||
| 73 | }; | ||
| 72 | 74 | ||
| 73 | struct Surface { | 75 | enum class CpuIndexWrap : u32 { |
| 74 | RenderTargetFormat format; | 76 | Wrap = 0, |
| 75 | BitField<0, 1, u32> linear; | 77 | NoWrap = 1, |
| 76 | union { | 78 | }; |
| 77 | BitField<0, 4, u32> block_width; | ||
| 78 | BitField<4, 4, u32> block_height; | ||
| 79 | BitField<8, 4, u32> block_depth; | ||
| 80 | }; | ||
| 81 | u32 depth; | ||
| 82 | u32 layer; | ||
| 83 | u32 pitch; | ||
| 84 | u32 width; | ||
| 85 | u32 height; | ||
| 86 | u32 address_high; | ||
| 87 | u32 address_low; | ||
| 88 | |||
| 89 | GPUVAddr Address() const { | ||
| 90 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 91 | address_low); | ||
| 92 | } | ||
| 93 | |||
| 94 | u32 BlockWidth() const { | ||
| 95 | return block_width.Value(); | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 BlockHeight() const { | ||
| 99 | return block_height.Value(); | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 BlockDepth() const { | ||
| 103 | return block_depth.Value(); | ||
| 104 | } | ||
| 105 | }; | ||
| 106 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | ||
| 107 | 79 | ||
| 80 | struct Surface { | ||
| 81 | RenderTargetFormat format; | ||
| 82 | MemoryLayout linear; | ||
| 108 | union { | 83 | union { |
| 109 | struct { | 84 | BitField<0, 4, u32> block_width; |
| 110 | INSERT_UNION_PADDING_WORDS(0x80); | 85 | BitField<4, 4, u32> block_height; |
| 86 | BitField<8, 4, u32> block_depth; | ||
| 87 | }; | ||
| 88 | u32 depth; | ||
| 89 | u32 layer; | ||
| 90 | u32 pitch; | ||
| 91 | u32 width; | ||
| 92 | u32 height; | ||
| 93 | u32 addr_upper; | ||
| 94 | u32 addr_lower; | ||
| 95 | |||
| 96 | [[nodiscard]] constexpr GPUVAddr Address() const noexcept { | ||
| 97 | return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower); | ||
| 98 | } | ||
| 99 | }; | ||
| 100 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | ||
| 111 | 101 | ||
| 112 | Surface dst; | 102 | enum class SectorPromotion : u32 { |
| 103 | NoPromotion = 0, | ||
| 104 | PromoteTo2V = 1, | ||
| 105 | PromoteTo2H = 2, | ||
| 106 | PromoteTo4 = 3, | ||
| 107 | }; | ||
| 108 | |||
| 109 | enum class NumTpcs : u32 { | ||
| 110 | All = 0, | ||
| 111 | One = 1, | ||
| 112 | }; | ||
| 113 | 113 | ||
| 114 | INSERT_UNION_PADDING_WORDS(2); | 114 | enum class RenderEnableMode : u32 { |
| 115 | False = 0, | ||
| 116 | True = 1, | ||
| 117 | Conditional = 2, | ||
| 118 | RenderIfEqual = 3, | ||
| 119 | RenderIfNotEqual = 4, | ||
| 120 | }; | ||
| 115 | 121 | ||
| 116 | Surface src; | 122 | enum class ColorKeyFormat : u32 { |
| 123 | A16R56G6B5 = 0, | ||
| 124 | A1R5G55B5 = 1, | ||
| 125 | A8R8G8B8 = 2, | ||
| 126 | A2R10G10B10 = 3, | ||
| 127 | Y8 = 4, | ||
| 128 | Y16 = 5, | ||
| 129 | Y32 = 6, | ||
| 130 | }; | ||
| 117 | 131 | ||
| 118 | INSERT_UNION_PADDING_WORDS(0x15); | 132 | union Beta4 { |
| 133 | BitField<0, 8, u32> b; | ||
| 134 | BitField<8, 8, u32> g; | ||
| 135 | BitField<16, 8, u32> r; | ||
| 136 | BitField<24, 8, u32> a; | ||
| 137 | }; | ||
| 119 | 138 | ||
| 120 | Operation operation; | 139 | struct Point { |
| 140 | u32 x; | ||
| 141 | u32 y; | ||
| 142 | }; | ||
| 121 | 143 | ||
| 122 | INSERT_UNION_PADDING_WORDS(0x177); | 144 | enum class PatternSelect : u32 { |
| 145 | MonoChrome8x8 = 0, | ||
| 146 | MonoChrome64x1 = 1, | ||
| 147 | MonoChrome1x64 = 2, | ||
| 148 | Color = 3, | ||
| 149 | }; | ||
| 123 | 150 | ||
| 151 | enum class NotifyType : u32 { | ||
| 152 | WriteOnly = 0, | ||
| 153 | WriteThenAwaken = 1, | ||
| 154 | }; | ||
| 155 | |||
| 156 | enum class MonochromePatternColorFormat : u32 { | ||
| 157 | A8X8R8G6B5 = 0, | ||
| 158 | A1R5G5B5 = 1, | ||
| 159 | A8R8G8B8 = 2, | ||
| 160 | A8Y8 = 3, | ||
| 161 | A8X8Y16 = 4, | ||
| 162 | Y32 = 5, | ||
| 163 | }; | ||
| 164 | |||
| 165 | enum class MonochromePatternFormat : u32 { | ||
| 166 | CGA6_M1 = 0, | ||
| 167 | LE_M1 = 1, | ||
| 168 | }; | ||
| 169 | |||
| 170 | union Regs { | ||
| 171 | static constexpr std::size_t NUM_REGS = 0x258; | ||
| 172 | struct { | ||
| 173 | u32 object; | ||
| 174 | INSERT_UNION_PADDING_WORDS(0x3F); | ||
| 175 | u32 no_operation; | ||
| 176 | NotifyType notify; | ||
| 177 | INSERT_UNION_PADDING_WORDS(0x2); | ||
| 178 | u32 wait_for_idle; | ||
| 179 | INSERT_UNION_PADDING_WORDS(0xB); | ||
| 180 | u32 pm_trigger; | ||
| 181 | INSERT_UNION_PADDING_WORDS(0xF); | ||
| 182 | u32 context_dma_notify; | ||
| 183 | u32 dst_context_dma; | ||
| 184 | u32 src_context_dma; | ||
| 185 | u32 semaphore_context_dma; | ||
| 186 | INSERT_UNION_PADDING_WORDS(0x1C); | ||
| 187 | Surface dst; | ||
| 188 | CpuIndexWrap pixels_from_cpu_index_wrap; | ||
| 189 | u32 kind2d_check_enable; | ||
| 190 | Surface src; | ||
| 191 | SectorPromotion pixels_from_memory_sector_promotion; | ||
| 192 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 193 | NumTpcs num_tpcs; | ||
| 194 | u32 render_enable_addr_upper; | ||
| 195 | u32 render_enable_addr_lower; | ||
| 196 | RenderEnableMode render_enable_mode; | ||
| 197 | INSERT_UNION_PADDING_WORDS(0x4); | ||
| 198 | u32 clip_x0; | ||
| 199 | u32 clip_y0; | ||
| 200 | u32 clip_width; | ||
| 201 | u32 clip_height; | ||
| 202 | BitField<0, 1, u32> clip_enable; | ||
| 203 | BitField<0, 3, ColorKeyFormat> color_key_format; | ||
| 204 | u32 color_key; | ||
| 205 | BitField<0, 1, u32> color_key_enable; | ||
| 206 | BitField<0, 8, u32> rop; | ||
| 207 | u32 beta1; | ||
| 208 | Beta4 beta4; | ||
| 209 | Operation operation; | ||
| 210 | union { | ||
| 211 | BitField<0, 6, u32> x; | ||
| 212 | BitField<8, 6, u32> y; | ||
| 213 | } pattern_offset; | ||
| 214 | BitField<0, 2, PatternSelect> pattern_select; | ||
| 215 | INSERT_UNION_PADDING_WORDS(0xC); | ||
| 216 | struct { | ||
| 217 | BitField<0, 3, MonochromePatternColorFormat> color_format; | ||
| 218 | BitField<0, 1, MonochromePatternFormat> format; | ||
| 219 | u32 color0; | ||
| 220 | u32 color1; | ||
| 221 | u32 pattern0; | ||
| 222 | u32 pattern1; | ||
| 223 | } monochrome_pattern; | ||
| 224 | struct { | ||
| 225 | std::array<u32, 0x40> X8R8G8B8; | ||
| 226 | std::array<u32, 0x20> R5G6B5; | ||
| 227 | std::array<u32, 0x20> X1R5G5B5; | ||
| 228 | std::array<u32, 0x10> Y8; | ||
| 229 | } color_pattern; | ||
| 230 | INSERT_UNION_PADDING_WORDS(0x10); | ||
| 231 | struct { | ||
| 232 | u32 prim_mode; | ||
| 233 | u32 prim_color_format; | ||
| 234 | u32 prim_color; | ||
| 235 | u32 line_tie_break_bits; | ||
| 236 | INSERT_UNION_PADDING_WORDS(0x14); | ||
| 237 | u32 prim_point_xy; | ||
| 238 | INSERT_UNION_PADDING_WORDS(0x7); | ||
| 239 | std::array<Point, 0x40> prim_point; | ||
| 240 | } render_solid; | ||
| 241 | struct { | ||
| 242 | u32 data_type; | ||
| 243 | u32 color_format; | ||
| 244 | u32 index_format; | ||
| 245 | u32 mono_format; | ||
| 246 | u32 wrap; | ||
| 247 | u32 color0; | ||
| 248 | u32 color1; | ||
| 249 | u32 mono_opacity; | ||
| 250 | INSERT_UNION_PADDING_WORDS(0x6); | ||
| 251 | u32 src_width; | ||
| 252 | u32 src_height; | ||
| 253 | u32 dx_du_frac; | ||
| 254 | u32 dx_du_int; | ||
| 255 | u32 dx_dv_frac; | ||
| 256 | u32 dy_dv_int; | ||
| 257 | u32 dst_x0_frac; | ||
| 258 | u32 dst_x0_int; | ||
| 259 | u32 dst_y0_frac; | ||
| 260 | u32 dst_y0_int; | ||
| 261 | u32 data; | ||
| 262 | } pixels_from_cpu; | ||
| 263 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 264 | u32 big_endian_control; | ||
| 265 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 266 | struct { | ||
| 267 | BitField<0, 3, u32> block_shape; | ||
| 268 | BitField<0, 5, u32> corral_size; | ||
| 269 | BitField<0, 1, u32> safe_overlap; | ||
| 124 | union { | 270 | union { |
| 125 | u32 raw; | ||
| 126 | BitField<0, 1, Origin> origin; | 271 | BitField<0, 1, Origin> origin; |
| 127 | BitField<4, 1, Filter> filter; | 272 | BitField<4, 1, Filter> filter; |
| 128 | } blit_control; | 273 | } sample_mode; |
| 129 | |||
| 130 | INSERT_UNION_PADDING_WORDS(0x8); | 274 | INSERT_UNION_PADDING_WORDS(0x8); |
| 131 | 275 | s32 dst_x0; | |
| 132 | u32 blit_dst_x; | 276 | s32 dst_y0; |
| 133 | u32 blit_dst_y; | 277 | s32 dst_width; |
| 134 | u32 blit_dst_width; | 278 | s32 dst_height; |
| 135 | u32 blit_dst_height; | 279 | s64 du_dx; |
| 136 | u64 blit_du_dx; | 280 | s64 dv_dy; |
| 137 | u64 blit_dv_dy; | 281 | s64 src_x0; |
| 138 | u64 blit_src_x; | 282 | s64 src_y0; |
| 139 | u64 blit_src_y; | 283 | } pixels_from_memory; |
| 140 | |||
| 141 | INSERT_UNION_PADDING_WORDS(0x21); | ||
| 142 | }; | ||
| 143 | std::array<u32, NUM_REGS> reg_array; | ||
| 144 | }; | 284 | }; |
| 285 | std::array<u32, NUM_REGS> reg_array; | ||
| 145 | } regs{}; | 286 | } regs{}; |
| 146 | 287 | ||
| 147 | struct Config { | 288 | struct Config { |
| 148 | Operation operation{}; | 289 | Operation operation; |
| 149 | Filter filter{}; | 290 | Filter filter; |
| 150 | Common::Rectangle<u32> src_rect; | 291 | s32 dst_x0; |
| 151 | Common::Rectangle<u32> dst_rect; | 292 | s32 dst_y0; |
| 293 | s32 dst_x1; | ||
| 294 | s32 dst_y1; | ||
| 295 | s32 src_x0; | ||
| 296 | s32 src_y0; | ||
| 297 | s32 src_x1; | ||
| 298 | s32 src_y1; | ||
| 152 | }; | 299 | }; |
| 153 | 300 | ||
| 154 | private: | 301 | private: |
| @@ -156,25 +303,49 @@ private: | |||
| 156 | 303 | ||
| 157 | /// Performs the copy from the source surface to the destination surface as configured in the | 304 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 158 | /// registers. | 305 | /// registers. |
| 159 | void HandleSurfaceCopy(); | 306 | void Blit(); |
| 160 | }; | 307 | }; |
| 161 | 308 | ||
| 162 | #define ASSERT_REG_POSITION(field_name, position) \ | 309 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 163 | static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ | 310 | static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ |
| 164 | "Field " #field_name " has invalid position") | 311 | "Field " #field_name " has invalid position") |
| 165 | 312 | ||
| 166 | ASSERT_REG_POSITION(dst, 0x80); | 313 | ASSERT_REG_POSITION(object, 0x0); |
| 167 | ASSERT_REG_POSITION(src, 0x8C); | 314 | ASSERT_REG_POSITION(no_operation, 0x100); |
| 168 | ASSERT_REG_POSITION(operation, 0xAB); | 315 | ASSERT_REG_POSITION(notify, 0x104); |
| 169 | ASSERT_REG_POSITION(blit_control, 0x223); | 316 | ASSERT_REG_POSITION(wait_for_idle, 0x110); |
| 170 | ASSERT_REG_POSITION(blit_dst_x, 0x22c); | 317 | ASSERT_REG_POSITION(pm_trigger, 0x140); |
| 171 | ASSERT_REG_POSITION(blit_dst_y, 0x22d); | 318 | ASSERT_REG_POSITION(context_dma_notify, 0x180); |
| 172 | ASSERT_REG_POSITION(blit_dst_width, 0x22e); | 319 | ASSERT_REG_POSITION(dst_context_dma, 0x184); |
| 173 | ASSERT_REG_POSITION(blit_dst_height, 0x22f); | 320 | ASSERT_REG_POSITION(src_context_dma, 0x188); |
| 174 | ASSERT_REG_POSITION(blit_du_dx, 0x230); | 321 | ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); |
| 175 | ASSERT_REG_POSITION(blit_dv_dy, 0x232); | 322 | ASSERT_REG_POSITION(dst, 0x200); |
| 176 | ASSERT_REG_POSITION(blit_src_x, 0x234); | 323 | ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); |
| 177 | ASSERT_REG_POSITION(blit_src_y, 0x236); | 324 | ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); |
| 325 | ASSERT_REG_POSITION(src, 0x230); | ||
| 326 | ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); | ||
| 327 | ASSERT_REG_POSITION(num_tpcs, 0x260); | ||
| 328 | ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); | ||
| 329 | ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); | ||
| 330 | ASSERT_REG_POSITION(clip_x0, 0x280); | ||
| 331 | ASSERT_REG_POSITION(clip_y0, 0x284); | ||
| 332 | ASSERT_REG_POSITION(clip_width, 0x288); | ||
| 333 | ASSERT_REG_POSITION(clip_height, 0x28c); | ||
| 334 | ASSERT_REG_POSITION(clip_enable, 0x290); | ||
| 335 | ASSERT_REG_POSITION(color_key_format, 0x294); | ||
| 336 | ASSERT_REG_POSITION(color_key, 0x298); | ||
| 337 | ASSERT_REG_POSITION(rop, 0x2A0); | ||
| 338 | ASSERT_REG_POSITION(beta1, 0x2A4); | ||
| 339 | ASSERT_REG_POSITION(beta4, 0x2A8); | ||
| 340 | ASSERT_REG_POSITION(operation, 0x2AC); | ||
| 341 | ASSERT_REG_POSITION(pattern_offset, 0x2B0); | ||
| 342 | ASSERT_REG_POSITION(pattern_select, 0x2B4); | ||
| 343 | ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); | ||
| 344 | ASSERT_REG_POSITION(color_pattern, 0x300); | ||
| 345 | ASSERT_REG_POSITION(render_solid, 0x580); | ||
| 346 | ASSERT_REG_POSITION(pixels_from_cpu, 0x800); | ||
| 347 | ASSERT_REG_POSITION(big_endian_control, 0x870); | ||
| 348 | ASSERT_REG_POSITION(pixels_from_memory, 0x880); | ||
| 178 | 349 | ||
| 179 | #undef ASSERT_REG_POSITION | 350 | #undef ASSERT_REG_POSITION |
| 180 | 351 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | |||
| 58 | } | 58 | } |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { | ||
| 62 | const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); | ||
| 63 | ASSERT(cbuf_mask[regs.tex_cb_index]); | ||
| 64 | |||
| 65 | const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; | ||
| 66 | ASSERT(texinfo.Address() != 0); | ||
| 67 | |||
| 68 | const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); | ||
| 69 | ASSERT(address < texinfo.Address() + texinfo.size); | ||
| 70 | |||
| 71 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; | ||
| 72 | return GetTextureInfo(tex_handle); | ||
| 73 | } | ||
| 74 | |||
| 75 | Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { | ||
| 76 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; | ||
| 77 | } | ||
| 78 | |||
| 79 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | 61 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |
| 80 | ASSERT(stage == ShaderType::Compute); | 62 | ASSERT(stage == ShaderType::Compute); |
| 81 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | 63 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; |
| @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 98 | 80 | ||
| 99 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { | 81 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { |
| 100 | const Texture::TextureHandle tex_handle{handle}; | 82 | const Texture::TextureHandle tex_handle{handle}; |
| 101 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 83 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |
| 102 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 84 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |
| 103 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 85 | |
| 86 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 87 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 104 | return result; | 88 | return result; |
| 105 | } | 89 | } |
| 106 | 90 | ||
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -209,11 +209,6 @@ public: | |||
| 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 210 | u32 methods_pending) override; | 210 | u32 methods_pending) override; |
| 211 | 211 | ||
| 212 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; | ||
| 213 | |||
| 214 | /// Given a texture handle, returns the TSC and TIC entries. | ||
| 215 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; | ||
| 216 | |||
| 217 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 212 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 218 | 213 | ||
| 219 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | 214 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cinttypes> | ||
| 6 | #include <cstring> | 5 | #include <cstring> |
| 7 | #include <optional> | 6 | #include <optional> |
| 8 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 227 | OnMemoryWrite(); | 226 | OnMemoryWrite(); |
| 228 | } | 227 | } |
| 229 | return; | 228 | return; |
| 229 | case MAXWELL3D_REG_INDEX(fragment_barrier): | ||
| 230 | return rasterizer->FragmentBarrier(); | ||
| 231 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | ||
| 232 | return rasterizer->TiledCacheBarrier(); | ||
| 230 | } | 233 | } |
| 231 | } | 234 | } |
| 232 | 235 | ||
| @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() { | |||
| 639 | } | 642 | } |
| 640 | 643 | ||
| 641 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 644 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| 642 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; | 645 | const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; |
| 643 | 646 | ||
| 644 | Texture::TICEntry tic_entry; | 647 | Texture::TICEntry tic_entry; |
| 645 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 648 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 648 | } | 651 | } |
| 649 | 652 | ||
| 650 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | 653 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { |
| 651 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; | 654 | const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; |
| 652 | 655 | ||
| 653 | Texture::TSCEntry tsc_entry; | 656 | Texture::TSCEntry tsc_entry; |
| 654 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 657 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 655 | return tsc_entry; | 658 | return tsc_entry; |
| 656 | } | 659 | } |
| 657 | 660 | ||
| 658 | Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { | ||
| 659 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; | ||
| 660 | } | ||
| 661 | |||
| 662 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { | ||
| 663 | const auto stage_index = static_cast<std::size_t>(stage); | ||
| 664 | const auto& shader = state.shader_stages[stage_index]; | ||
| 665 | const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | ||
| 666 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||
| 667 | |||
| 668 | const GPUVAddr tex_info_address = | ||
| 669 | tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); | ||
| 670 | |||
| 671 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | ||
| 672 | |||
| 673 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 674 | |||
| 675 | return GetTextureInfo(tex_handle); | ||
| 676 | } | ||
| 677 | |||
| 678 | u32 Maxwell3D::GetRegisterValue(u32 method) const { | 661 | u32 Maxwell3D::GetRegisterValue(u32 method) const { |
| 679 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); | 662 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); |
| 680 | return regs.reg_array[method]; | 663 | return regs.reg_array[method]; |
| 681 | } | 664 | } |
| 682 | 665 | ||
| 683 | void Maxwell3D::ProcessClearBuffers() { | 666 | void Maxwell3D::ProcessClearBuffers() { |
| 684 | ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && | ||
| 685 | regs.clear_buffers.R == regs.clear_buffers.B && | ||
| 686 | regs.clear_buffers.R == regs.clear_buffers.A); | ||
| 687 | |||
| 688 | rasterizer->Clear(); | 667 | rasterizer->Clear(); |
| 689 | } | 668 | } |
| 690 | 669 | ||
| @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse | |||
| 692 | ASSERT(stage != ShaderType::Compute); | 671 | ASSERT(stage != ShaderType::Compute); |
| 693 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | 672 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 694 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | 673 | const auto& buffer = shader_stage.const_buffers[const_buffer]; |
| 695 | u32 result; | 674 | return memory_manager.Read<u32>(buffer.address + offset); |
| 696 | std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); | ||
| 697 | return result; | ||
| 698 | } | 675 | } |
| 699 | 676 | ||
| 700 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | 677 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { |
| @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 712 | 689 | ||
| 713 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { | 690 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { |
| 714 | const Texture::TextureHandle tex_handle{handle}; | 691 | const Texture::TextureHandle tex_handle{handle}; |
| 715 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 692 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |
| 716 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 693 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |
| 717 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 694 | |
| 695 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 696 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 718 | return result; | 697 | return result; |
| 719 | } | 698 | } |
| 720 | 699 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -438,16 +438,6 @@ public: | |||
| 438 | DecrWrapOGL = 0x8508, | 438 | DecrWrapOGL = 0x8508, |
| 439 | }; | 439 | }; |
| 440 | 440 | ||
| 441 | enum class MemoryLayout : u32 { | ||
| 442 | Linear = 0, | ||
| 443 | BlockLinear = 1, | ||
| 444 | }; | ||
| 445 | |||
| 446 | enum class InvMemoryLayout : u32 { | ||
| 447 | BlockLinear = 0, | ||
| 448 | Linear = 1, | ||
| 449 | }; | ||
| 450 | |||
| 451 | enum class CounterReset : u32 { | 441 | enum class CounterReset : u32 { |
| 452 | SampleCnt = 0x01, | 442 | SampleCnt = 0x01, |
| 453 | Unk02 = 0x02, | 443 | Unk02 = 0x02, |
| @@ -589,21 +579,31 @@ public: | |||
| 589 | NegativeW = 7, | 579 | NegativeW = 7, |
| 590 | }; | 580 | }; |
| 591 | 581 | ||
| 582 | enum class SamplerIndex : u32 { | ||
| 583 | Independently = 0, | ||
| 584 | ViaHeaderIndex = 1, | ||
| 585 | }; | ||
| 586 | |||
| 587 | struct TileMode { | ||
| 588 | union { | ||
| 589 | BitField<0, 4, u32> block_width; | ||
| 590 | BitField<4, 4, u32> block_height; | ||
| 591 | BitField<8, 4, u32> block_depth; | ||
| 592 | BitField<12, 1, u32> is_pitch_linear; | ||
| 593 | BitField<16, 1, u32> is_3d; | ||
| 594 | }; | ||
| 595 | }; | ||
| 596 | static_assert(sizeof(TileMode) == 4); | ||
| 597 | |||
| 592 | struct RenderTargetConfig { | 598 | struct RenderTargetConfig { |
| 593 | u32 address_high; | 599 | u32 address_high; |
| 594 | u32 address_low; | 600 | u32 address_low; |
| 595 | u32 width; | 601 | u32 width; |
| 596 | u32 height; | 602 | u32 height; |
| 597 | Tegra::RenderTargetFormat format; | 603 | Tegra::RenderTargetFormat format; |
| 604 | TileMode tile_mode; | ||
| 598 | union { | 605 | union { |
| 599 | BitField<0, 3, u32> block_width; | 606 | BitField<0, 16, u32> depth; |
| 600 | BitField<4, 3, u32> block_height; | ||
| 601 | BitField<8, 3, u32> block_depth; | ||
| 602 | BitField<12, 1, InvMemoryLayout> type; | ||
| 603 | BitField<16, 1, u32> is_3d; | ||
| 604 | } memory_layout; | ||
| 605 | union { | ||
| 606 | BitField<0, 16, u32> layers; | ||
| 607 | BitField<16, 1, u32> volume; | 607 | BitField<16, 1, u32> volume; |
| 608 | }; | 608 | }; |
| 609 | u32 layer_stride; | 609 | u32 layer_stride; |
| @@ -832,7 +832,11 @@ public: | |||
| 832 | 832 | ||
| 833 | u32 patch_vertices; | 833 | u32 patch_vertices; |
| 834 | 834 | ||
| 835 | INSERT_UNION_PADDING_WORDS(0xC); | 835 | INSERT_UNION_PADDING_WORDS(0x4); |
| 836 | |||
| 837 | u32 fragment_barrier; | ||
| 838 | |||
| 839 | INSERT_UNION_PADDING_WORDS(0x7); | ||
| 836 | 840 | ||
| 837 | std::array<ScissorTest, NumViewports> scissor_test; | 841 | std::array<ScissorTest, NumViewports> scissor_test; |
| 838 | 842 | ||
| @@ -842,7 +846,15 @@ public: | |||
| 842 | u32 stencil_back_mask; | 846 | u32 stencil_back_mask; |
| 843 | u32 stencil_back_func_mask; | 847 | u32 stencil_back_func_mask; |
| 844 | 848 | ||
| 845 | INSERT_UNION_PADDING_WORDS(0xC); | 849 | INSERT_UNION_PADDING_WORDS(0x5); |
| 850 | |||
| 851 | u32 invalidate_texture_data_cache; | ||
| 852 | |||
| 853 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 854 | |||
| 855 | u32 tiled_cache_barrier; | ||
| 856 | |||
| 857 | INSERT_UNION_PADDING_WORDS(0x4); | ||
| 846 | 858 | ||
| 847 | u32 color_mask_common; | 859 | u32 color_mask_common; |
| 848 | 860 | ||
| @@ -866,12 +878,7 @@ public: | |||
| 866 | u32 address_high; | 878 | u32 address_high; |
| 867 | u32 address_low; | 879 | u32 address_low; |
| 868 | Tegra::DepthFormat format; | 880 | Tegra::DepthFormat format; |
| 869 | union { | 881 | TileMode tile_mode; |
| 870 | BitField<0, 4, u32> block_width; | ||
| 871 | BitField<4, 4, u32> block_height; | ||
| 872 | BitField<8, 4, u32> block_depth; | ||
| 873 | BitField<20, 1, InvMemoryLayout> type; | ||
| 874 | } memory_layout; | ||
| 875 | u32 layer_stride; | 882 | u32 layer_stride; |
| 876 | 883 | ||
| 877 | GPUVAddr Address() const { | 884 | GPUVAddr Address() const { |
| @@ -880,7 +887,18 @@ public: | |||
| 880 | } | 887 | } |
| 881 | } zeta; | 888 | } zeta; |
| 882 | 889 | ||
| 883 | INSERT_UNION_PADDING_WORDS(0x41); | 890 | struct { |
| 891 | union { | ||
| 892 | BitField<0, 16, u32> x; | ||
| 893 | BitField<16, 16, u32> width; | ||
| 894 | }; | ||
| 895 | union { | ||
| 896 | BitField<0, 16, u32> y; | ||
| 897 | BitField<16, 16, u32> height; | ||
| 898 | }; | ||
| 899 | } render_area; | ||
| 900 | |||
| 901 | INSERT_UNION_PADDING_WORDS(0x3F); | ||
| 884 | 902 | ||
| 885 | union { | 903 | union { |
| 886 | BitField<0, 4, u32> stencil; | 904 | BitField<0, 4, u32> stencil; |
| @@ -921,7 +939,7 @@ public: | |||
| 921 | BitField<25, 3, u32> map_7; | 939 | BitField<25, 3, u32> map_7; |
| 922 | }; | 940 | }; |
| 923 | 941 | ||
| 924 | u32 GetMap(std::size_t index) const { | 942 | u32 Map(std::size_t index) const { |
| 925 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, | 943 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, |
| 926 | map_4, map_5, map_6, map_7}; | 944 | map_4, map_5, map_6, map_7}; |
| 927 | ASSERT(index < maps.size()); | 945 | ASSERT(index < maps.size()); |
| @@ -934,11 +952,13 @@ public: | |||
| 934 | u32 zeta_width; | 952 | u32 zeta_width; |
| 935 | u32 zeta_height; | 953 | u32 zeta_height; |
| 936 | union { | 954 | union { |
| 937 | BitField<0, 16, u32> zeta_layers; | 955 | BitField<0, 16, u32> zeta_depth; |
| 938 | BitField<16, 1, u32> zeta_volume; | 956 | BitField<16, 1, u32> zeta_volume; |
| 939 | }; | 957 | }; |
| 940 | 958 | ||
| 941 | INSERT_UNION_PADDING_WORDS(0x26); | 959 | SamplerIndex sampler_index; |
| 960 | |||
| 961 | INSERT_UNION_PADDING_WORDS(0x25); | ||
| 942 | 962 | ||
| 943 | u32 depth_test_enable; | 963 | u32 depth_test_enable; |
| 944 | 964 | ||
| @@ -964,6 +984,7 @@ public: | |||
| 964 | float b; | 984 | float b; |
| 965 | float a; | 985 | float a; |
| 966 | } blend_color; | 986 | } blend_color; |
| 987 | |||
| 967 | INSERT_UNION_PADDING_WORDS(0x4); | 988 | INSERT_UNION_PADDING_WORDS(0x4); |
| 968 | 989 | ||
| 969 | struct { | 990 | struct { |
| @@ -1001,7 +1022,12 @@ public: | |||
| 1001 | float line_width_smooth; | 1022 | float line_width_smooth; |
| 1002 | float line_width_aliased; | 1023 | float line_width_aliased; |
| 1003 | 1024 | ||
| 1004 | INSERT_UNION_PADDING_WORDS(0x1F); | 1025 | INSERT_UNION_PADDING_WORDS(0x1B); |
| 1026 | |||
| 1027 | u32 invalidate_sampler_cache_no_wfi; | ||
| 1028 | u32 invalidate_texture_header_cache_no_wfi; | ||
| 1029 | |||
| 1030 | INSERT_UNION_PADDING_WORDS(0x2); | ||
| 1005 | 1031 | ||
| 1006 | u32 vb_element_base; | 1032 | u32 vb_element_base; |
| 1007 | u32 vb_base_instance; | 1033 | u32 vb_base_instance; |
| @@ -1045,13 +1071,13 @@ public: | |||
| 1045 | } condition; | 1071 | } condition; |
| 1046 | 1072 | ||
| 1047 | struct { | 1073 | struct { |
| 1048 | u32 tsc_address_high; | 1074 | u32 address_high; |
| 1049 | u32 tsc_address_low; | 1075 | u32 address_low; |
| 1050 | u32 tsc_limit; | 1076 | u32 limit; |
| 1051 | 1077 | ||
| 1052 | GPUVAddr TSCAddress() const { | 1078 | GPUVAddr Address() const { |
| 1053 | return static_cast<GPUVAddr>( | 1079 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 1054 | (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); | 1080 | address_low); |
| 1055 | } | 1081 | } |
| 1056 | } tsc; | 1082 | } tsc; |
| 1057 | 1083 | ||
| @@ -1062,13 +1088,13 @@ public: | |||
| 1062 | u32 line_smooth_enable; | 1088 | u32 line_smooth_enable; |
| 1063 | 1089 | ||
| 1064 | struct { | 1090 | struct { |
| 1065 | u32 tic_address_high; | 1091 | u32 address_high; |
| 1066 | u32 tic_address_low; | 1092 | u32 address_low; |
| 1067 | u32 tic_limit; | 1093 | u32 limit; |
| 1068 | 1094 | ||
| 1069 | GPUVAddr TICAddress() const { | 1095 | GPUVAddr Address() const { |
| 1070 | return static_cast<GPUVAddr>( | 1096 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 1071 | (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); | 1097 | address_low); |
| 1072 | } | 1098 | } |
| 1073 | } tic; | 1099 | } tic; |
| 1074 | 1100 | ||
| @@ -1397,12 +1423,6 @@ public: | |||
| 1397 | 1423 | ||
| 1398 | void FlushMMEInlineDraw(); | 1424 | void FlushMMEInlineDraw(); |
| 1399 | 1425 | ||
| 1400 | /// Given a texture handle, returns the TSC and TIC entries. | ||
| 1401 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; | ||
| 1402 | |||
| 1403 | /// Returns the texture information for a specific texture in a specific shader stage. | ||
| 1404 | Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; | ||
| 1405 | |||
| 1406 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 1426 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 1407 | 1427 | ||
| 1408 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | 1428 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |
| @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | |||
| 1598 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | 1618 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); |
| 1599 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | 1619 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); |
| 1600 | ASSERT_REG_POSITION(patch_vertices, 0x373); | 1620 | ASSERT_REG_POSITION(patch_vertices, 0x373); |
| 1621 | ASSERT_REG_POSITION(fragment_barrier, 0x378); | ||
| 1601 | ASSERT_REG_POSITION(scissor_test, 0x380); | 1622 | ASSERT_REG_POSITION(scissor_test, 0x380); |
| 1602 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); | 1623 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); |
| 1603 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); | 1624 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); |
| 1604 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); | 1625 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); |
| 1626 | ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); | ||
| 1627 | ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); | ||
| 1605 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); | 1628 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); |
| 1606 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); | 1629 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); |
| 1607 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | 1630 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); |
| @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); | |||
| 1609 | ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); | 1632 | ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); |
| 1610 | ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); | 1633 | ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); |
| 1611 | ASSERT_REG_POSITION(zeta, 0x3F8); | 1634 | ASSERT_REG_POSITION(zeta, 0x3F8); |
| 1635 | ASSERT_REG_POSITION(render_area, 0x3FD); | ||
| 1612 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1636 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1613 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | 1637 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); |
| 1614 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1638 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); | |||
| 1617 | ASSERT_REG_POSITION(rt_control, 0x487); | 1641 | ASSERT_REG_POSITION(rt_control, 0x487); |
| 1618 | ASSERT_REG_POSITION(zeta_width, 0x48a); | 1642 | ASSERT_REG_POSITION(zeta_width, 0x48a); |
| 1619 | ASSERT_REG_POSITION(zeta_height, 0x48b); | 1643 | ASSERT_REG_POSITION(zeta_height, 0x48b); |
| 1620 | ASSERT_REG_POSITION(zeta_layers, 0x48c); | 1644 | ASSERT_REG_POSITION(zeta_depth, 0x48c); |
| 1645 | ASSERT_REG_POSITION(sampler_index, 0x48D); | ||
| 1621 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); | 1646 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); |
| 1622 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); | 1647 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); |
| 1623 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); | 1648 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); |
| @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); | |||
| 1641 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); | 1666 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); |
| 1642 | ASSERT_REG_POSITION(line_width_smooth, 0x4EC); | 1667 | ASSERT_REG_POSITION(line_width_smooth, 0x4EC); |
| 1643 | ASSERT_REG_POSITION(line_width_aliased, 0x4ED); | 1668 | ASSERT_REG_POSITION(line_width_aliased, 0x4ED); |
| 1669 | ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); | ||
| 1670 | ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); | ||
| 1644 | ASSERT_REG_POSITION(vb_element_base, 0x50D); | 1671 | ASSERT_REG_POSITION(vb_element_base, 0x50D); |
| 1645 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); | 1672 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); |
| 1646 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | 1673 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | void MaxwellDMA::CopyBlockLinearToPitch() { | 98 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 99 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||
| 99 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | 100 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); |
| 100 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); | 101 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); |
| 101 | 102 | ||
| @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | void MaxwellDMA::CopyPitchToBlockLinear() { | 138 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 139 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | ||
| 140 | |||
| 138 | const auto& dst_params = regs.dst_params; | 141 | const auto& dst_params = regs.dst_params; |
| 139 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | 142 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |
| 140 | const u32 width = dst_params.width; | 143 | const u32 width = dst_params.width; |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c5f26896e..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/core.h" | 11 | #include "core/core.h" |
| 12 | #include "video_core/delayed_destruction_ring.h" | ||
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 13 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| @@ -47,6 +48,11 @@ protected: | |||
| 47 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | 48 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> |
| 48 | class FenceManager { | 49 | class FenceManager { |
| 49 | public: | 50 | public: |
| 51 | /// Notify the fence manager about a new frame | ||
| 52 | void TickFrame() { | ||
| 53 | delayed_destruction_ring.Tick(); | ||
| 54 | } | ||
| 55 | |||
| 50 | void SignalSemaphore(GPUVAddr addr, u32 value) { | 56 | void SignalSemaphore(GPUVAddr addr, u32 value) { |
| 51 | TryReleasePendingFences(); | 57 | TryReleasePendingFences(); |
| 52 | const bool should_flush = ShouldFlush(); | 58 | const bool should_flush = ShouldFlush(); |
| @@ -86,7 +92,7 @@ public: | |||
| 86 | } else { | 92 | } else { |
| 87 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 93 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 88 | } | 94 | } |
| 89 | fences.pop(); | 95 | PopFence(); |
| 90 | } | 96 | } |
| 91 | } | 97 | } |
| 92 | 98 | ||
| @@ -132,7 +138,7 @@ private: | |||
| 132 | } else { | 138 | } else { |
| 133 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 139 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 134 | } | 140 | } |
| 135 | fences.pop(); | 141 | PopFence(); |
| 136 | } | 142 | } |
| 137 | } | 143 | } |
| 138 | 144 | ||
| @@ -158,7 +164,14 @@ private: | |||
| 158 | query_cache.CommitAsyncFlushes(); | 164 | query_cache.CommitAsyncFlushes(); |
| 159 | } | 165 | } |
| 160 | 166 | ||
| 167 | void PopFence() { | ||
| 168 | delayed_destruction_ring.Push(std::move(fences.front())); | ||
| 169 | fences.pop(); | ||
| 170 | } | ||
| 171 | |||
| 161 | std::queue<TFence> fences; | 172 | std::queue<TFence> fences; |
| 173 | |||
| 174 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | ||
| 162 | }; | 175 | }; |
| 163 | 176 | ||
| 164 | } // namespace VideoCommon | 177 | } // namespace VideoCommon |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -1,8 +1,26 @@ | |||
| 1 | set(SHADER_SOURCES | 1 | set(SHADER_FILES |
| 2 | block_linear_unswizzle_2d.comp | ||
| 3 | block_linear_unswizzle_3d.comp | ||
| 4 | convert_depth_to_float.frag | ||
| 5 | convert_float_to_depth.frag | ||
| 6 | full_screen_triangle.vert | ||
| 7 | opengl_copy_bc4.comp | ||
| 2 | opengl_present.frag | 8 | opengl_present.frag |
| 3 | opengl_present.vert | 9 | opengl_present.vert |
| 10 | pitch_unswizzle.comp | ||
| 11 | vulkan_blit_color_float.frag | ||
| 12 | vulkan_blit_depth_stencil.frag | ||
| 13 | vulkan_present.frag | ||
| 14 | vulkan_present.vert | ||
| 15 | vulkan_quad_array.comp | ||
| 16 | vulkan_quad_indexed.comp | ||
| 17 | vulkan_uint8.comp | ||
| 4 | ) | 18 | ) |
| 5 | 19 | ||
| 20 | find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) | ||
| 21 | |||
| 22 | set(GLSL_FLAGS "") | ||
| 23 | |||
| 6 | set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) | 24 | set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) |
| 7 | set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) | 25 | set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) |
| 8 | set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) | 26 | set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) |
| @@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) | |||
| 10 | set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) | 28 | set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) |
| 11 | set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) | 29 | set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) |
| 12 | 30 | ||
| 13 | foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) | 31 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) |
| 14 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) | 32 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) |
| 15 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) | 33 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) |
| 16 | set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) | 34 | # Skip generating source headers on Vulkan exclusive files |
| 17 | add_custom_command( | 35 | if (NOT ${FILENAME} MATCHES "vulkan.*") |
| 18 | OUTPUT | 36 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) |
| 19 | ${HEADER_FILE} | 37 | add_custom_command( |
| 20 | COMMAND | 38 | OUTPUT |
| 21 | ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} | 39 | ${SOURCE_HEADER_FILE} |
| 22 | MAIN_DEPENDENCY | 40 | COMMAND |
| 23 | ${SOURCE_FILE} | 41 | ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} |
| 24 | DEPENDS | 42 | MAIN_DEPENDENCY |
| 25 | ${INPUT_FILE} | 43 | ${SOURCE_FILE} |
| 26 | # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified | 44 | DEPENDS |
| 27 | ) | 45 | ${INPUT_FILE} |
| 28 | set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) | 46 | # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified |
| 47 | ) | ||
| 48 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) | ||
| 49 | endif() | ||
| 50 | # Skip compiling to SPIR-V OpenGL exclusive files | ||
| 51 | if (NOT ${FILENAME} MATCHES "opengl.*") | ||
| 52 | string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) | ||
| 53 | set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) | ||
| 54 | add_custom_command( | ||
| 55 | OUTPUT | ||
| 56 | ${SPIRV_HEADER_FILE} | ||
| 57 | COMMAND | ||
| 58 | ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||
| 59 | MAIN_DEPENDENCY | ||
| 60 | ${SOURCE_FILE} | ||
| 61 | ) | ||
| 62 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) | ||
| 63 | endif() | ||
| 29 | endforeach() | 64 | endforeach() |
| 30 | 65 | ||
| 31 | add_custom_target(host_shaders | 66 | add_custom_target(host_shaders |
| 32 | DEPENDS | 67 | DEPENDS |
| 33 | ${SHADER_HEADERS} | 68 | ${SHADER_HEADERS} |
| 34 | SOURCES | 69 | SOURCES |
| 35 | ${SHADER_SOURCES} | 70 | ${SHADER_FILES} |
| 36 | ) | 71 | ) |
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 16 | #define BINDING_INPUT_BUFFER 1 | ||
| 17 | #define BINDING_OUTPUT_IMAGE 2 | ||
| 18 | |||
| 19 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 20 | |||
| 21 | #extension GL_NV_gpu_shader5 : enable | ||
| 22 | #ifdef GL_NV_gpu_shader5 | ||
| 23 | #define HAS_EXTENDED_TYPES 1 | ||
| 24 | #else | ||
| 25 | #define HAS_EXTENDED_TYPES 0 | ||
| 26 | #endif | ||
| 27 | #define BEGIN_PUSH_CONSTANTS | ||
| 28 | #define END_PUSH_CONSTANTS | ||
| 29 | #define UNIFORM(n) layout (location = n) uniform | ||
| 30 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 31 | #define BINDING_INPUT_BUFFER 1 | ||
| 32 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 33 | |||
| 34 | #endif | ||
| 35 | |||
| 36 | BEGIN_PUSH_CONSTANTS | ||
| 37 | UNIFORM(0) uvec3 origin; | ||
| 38 | UNIFORM(1) ivec3 destination; | ||
| 39 | UNIFORM(2) uint bytes_per_block_log2; | ||
| 40 | UNIFORM(3) uint layer_stride; | ||
| 41 | UNIFORM(4) uint block_size; | ||
| 42 | UNIFORM(5) uint x_shift; | ||
| 43 | UNIFORM(6) uint block_height; | ||
| 44 | UNIFORM(7) uint block_height_mask; | ||
| 45 | END_PUSH_CONSTANTS | ||
| 46 | |||
| 47 | layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | ||
| 48 | uint swizzle_table[]; | ||
| 49 | }; | ||
| 50 | |||
| 51 | #if HAS_EXTENDED_TYPES | ||
| 52 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 53 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 54 | #endif | ||
| 55 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; | ||
| 56 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 57 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 58 | |||
| 59 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; | ||
| 60 | |||
| 61 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | ||
| 62 | |||
| 63 | const uint GOB_SIZE_X = 64; | ||
| 64 | const uint GOB_SIZE_Y = 8; | ||
| 65 | const uint GOB_SIZE_Z = 1; | ||
| 66 | const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | ||
| 67 | |||
| 68 | const uint GOB_SIZE_X_SHIFT = 6; | ||
| 69 | const uint GOB_SIZE_Y_SHIFT = 3; | ||
| 70 | const uint GOB_SIZE_Z_SHIFT = 0; | ||
| 71 | const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 72 | |||
| 73 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | ||
| 74 | |||
| 75 | uint SwizzleOffset(uvec2 pos) { | ||
| 76 | pos = pos & SWIZZLE_MASK; | ||
| 77 | return swizzle_table[pos.y * 64 + pos.x]; | ||
| 78 | } | ||
| 79 | |||
| 80 | uvec4 ReadTexel(uint offset) { | ||
| 81 | switch (bytes_per_block_log2) { | ||
| 82 | #if HAS_EXTENDED_TYPES | ||
| 83 | case 0: | ||
| 84 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 85 | case 1: | ||
| 86 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 87 | #else | ||
| 88 | case 0: | ||
| 89 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 90 | case 1: | ||
| 91 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 92 | #endif | ||
| 93 | case 2: | ||
| 94 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 95 | case 3: | ||
| 96 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 97 | case 4: | ||
| 98 | return u128data[offset / 16]; | ||
| 99 | } | ||
| 100 | return uvec4(0); | ||
| 101 | } | ||
| 102 | |||
| 103 | void main() { | ||
| 104 | uvec3 pos = gl_GlobalInvocationID + origin; | ||
| 105 | pos.x <<= bytes_per_block_log2; | ||
| 106 | |||
| 107 | // Read as soon as possible due to its latency | ||
| 108 | const uint swizzle = SwizzleOffset(pos.xy); | ||
| 109 | |||
| 110 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||
| 111 | |||
| 112 | uint offset = 0; | ||
| 113 | offset += pos.z * layer_stride; | ||
| 114 | offset += (block_y >> block_height) * block_size; | ||
| 115 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||
| 116 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 117 | offset += swizzle; | ||
| 118 | |||
| 119 | const uvec4 texel = ReadTexel(offset); | ||
| 120 | const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; | ||
| 121 | imageStore(output_image, coord, texel); | ||
| 122 | } | ||
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 16 | #define BINDING_INPUT_BUFFER 1 | ||
| 17 | #define BINDING_OUTPUT_IMAGE 2 | ||
| 18 | |||
| 19 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 20 | |||
| 21 | #extension GL_NV_gpu_shader5 : enable | ||
| 22 | #ifdef GL_NV_gpu_shader5 | ||
| 23 | #define HAS_EXTENDED_TYPES 1 | ||
| 24 | #else | ||
| 25 | #define HAS_EXTENDED_TYPES 0 | ||
| 26 | #endif | ||
| 27 | #define BEGIN_PUSH_CONSTANTS | ||
| 28 | #define END_PUSH_CONSTANTS | ||
| 29 | #define UNIFORM(n) layout (location = n) uniform | ||
| 30 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 31 | #define BINDING_INPUT_BUFFER 1 | ||
| 32 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 33 | |||
| 34 | #endif | ||
| 35 | |||
| 36 | BEGIN_PUSH_CONSTANTS | ||
| 37 | UNIFORM(0) uvec3 origin; | ||
| 38 | UNIFORM(1) ivec3 destination; | ||
| 39 | UNIFORM(2) uint bytes_per_block_log2; | ||
| 40 | UNIFORM(3) uint slice_size; | ||
| 41 | UNIFORM(4) uint block_size; | ||
| 42 | UNIFORM(5) uint x_shift; | ||
| 43 | UNIFORM(6) uint block_height; | ||
| 44 | UNIFORM(7) uint block_height_mask; | ||
| 45 | UNIFORM(8) uint block_depth; | ||
| 46 | UNIFORM(9) uint block_depth_mask; | ||
| 47 | END_PUSH_CONSTANTS | ||
| 48 | |||
| 49 | layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | ||
| 50 | uint swizzle_table[]; | ||
| 51 | }; | ||
| 52 | |||
| 53 | #if HAS_EXTENDED_TYPES | ||
| 54 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 55 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 56 | #endif | ||
| 57 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; | ||
| 58 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 59 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 60 | |||
| 61 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; | ||
| 62 | |||
| 63 | layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; | ||
| 64 | |||
| 65 | const uint GOB_SIZE_X = 64; | ||
| 66 | const uint GOB_SIZE_Y = 8; | ||
| 67 | const uint GOB_SIZE_Z = 1; | ||
| 68 | const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | ||
| 69 | |||
| 70 | const uint GOB_SIZE_X_SHIFT = 6; | ||
| 71 | const uint GOB_SIZE_Y_SHIFT = 3; | ||
| 72 | const uint GOB_SIZE_Z_SHIFT = 0; | ||
| 73 | const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 74 | |||
| 75 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | ||
| 76 | |||
| 77 | uint SwizzleOffset(uvec2 pos) { | ||
| 78 | pos = pos & SWIZZLE_MASK; | ||
| 79 | return swizzle_table[pos.y * 64 + pos.x]; | ||
| 80 | } | ||
| 81 | |||
| 82 | uvec4 ReadTexel(uint offset) { | ||
| 83 | switch (bytes_per_block_log2) { | ||
| 84 | #if HAS_EXTENDED_TYPES | ||
| 85 | case 0: | ||
| 86 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 87 | case 1: | ||
| 88 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 89 | #else | ||
| 90 | case 0: | ||
| 91 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 92 | case 1: | ||
| 93 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 94 | #endif | ||
| 95 | case 2: | ||
| 96 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 97 | case 3: | ||
| 98 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 99 | case 4: | ||
| 100 | return u128data[offset / 16]; | ||
| 101 | } | ||
| 102 | return uvec4(0); | ||
| 103 | } | ||
| 104 | |||
| 105 | void main() { | ||
| 106 | uvec3 pos = gl_GlobalInvocationID + origin; | ||
| 107 | pos.x <<= bytes_per_block_log2; | ||
| 108 | |||
| 109 | // Read as soon as possible due to its latency | ||
| 110 | const uint swizzle = SwizzleOffset(pos.xy); | ||
| 111 | |||
| 112 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||
| 113 | |||
| 114 | uint offset = 0; | ||
| 115 | offset += (pos.z >> block_depth) * slice_size; | ||
| 116 | offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); | ||
| 117 | offset += (block_y >> block_height) * block_size; | ||
| 118 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||
| 119 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 120 | offset += swizzle; | ||
| 121 | |||
| 122 | const uvec4 texel = ReadTexel(offset); | ||
| 123 | const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; | ||
| 124 | imageStore(output_image, coord, texel); | ||
| 125 | } | ||
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D depth_texture; | ||
| 8 | layout(location = 0) out float output_color; | ||
| 9 | |||
| 10 | void main() { | ||
| 11 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 12 | output_color = texelFetch(depth_texture, coord, 0).r; | ||
| 13 | } | ||
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D color_texture; | ||
| 8 | |||
| 9 | void main() { | ||
| 10 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 11 | float color = texelFetch(color_texture, coord, 0).r; | ||
| 12 | gl_FragDepth = color; | ||
| 13 | } | ||
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 9 | #define END_PUSH_CONSTANTS }; | ||
| 10 | #define UNIFORM(n) | ||
| 11 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 12 | #define BEGIN_PUSH_CONSTANTS | ||
| 13 | #define END_PUSH_CONSTANTS | ||
| 14 | #define UNIFORM(n) layout (location = n) uniform | ||
| 15 | #endif | ||
| 16 | |||
| 17 | BEGIN_PUSH_CONSTANTS | ||
| 18 | UNIFORM(0) vec2 tex_scale; | ||
| 19 | UNIFORM(1) vec2 tex_offset; | ||
| 20 | END_PUSH_CONSTANTS | ||
| 21 | |||
| 22 | layout(location = 0) out vec2 texcoord; | ||
| 23 | |||
| 24 | void main() { | ||
| 25 | float x = float((gl_VertexIndex & 1) << 2); | ||
| 26 | float y = float((gl_VertexIndex & 2) << 1); | ||
| 27 | gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); | ||
| 28 | texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); | ||
| 29 | } | ||
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 core | ||
| 6 | #extension GL_ARB_gpu_shader_int64 : require | ||
| 7 | |||
| 8 | layout (local_size_x = 4, local_size_y = 4) in; | ||
| 9 | |||
| 10 | layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; | ||
| 11 | layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; | ||
| 12 | |||
| 13 | layout(location = 0) uniform uvec3 src_offset; | ||
| 14 | layout(location = 1) uniform uvec3 dst_offset; | ||
| 15 | |||
| 16 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 17 | uint DecompressBlock(uint64_t bits, uvec2 coord) { | ||
| 18 | const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); | ||
| 19 | const uint code = uint(bits >> code_offset) & 7; | ||
| 20 | const uint red0 = uint(bits >> 0) & 0xff; | ||
| 21 | const uint red1 = uint(bits >> 8) & 0xff; | ||
| 22 | if (red0 > red1) { | ||
| 23 | switch (code) { | ||
| 24 | case 0: | ||
| 25 | return red0; | ||
| 26 | case 1: | ||
| 27 | return red1; | ||
| 28 | case 2: | ||
| 29 | return (6 * red0 + 1 * red1) / 7; | ||
| 30 | case 3: | ||
| 31 | return (5 * red0 + 2 * red1) / 7; | ||
| 32 | case 4: | ||
| 33 | return (4 * red0 + 3 * red1) / 7; | ||
| 34 | case 5: | ||
| 35 | return (3 * red0 + 4 * red1) / 7; | ||
| 36 | case 6: | ||
| 37 | return (2 * red0 + 5 * red1) / 7; | ||
| 38 | case 7: | ||
| 39 | return (1 * red0 + 6 * red1) / 7; | ||
| 40 | } | ||
| 41 | } else { | ||
| 42 | switch (code) { | ||
| 43 | case 0: | ||
| 44 | return red0; | ||
| 45 | case 1: | ||
| 46 | return red1; | ||
| 47 | case 2: | ||
| 48 | return (4 * red0 + 1 * red1) / 5; | ||
| 49 | case 3: | ||
| 50 | return (3 * red0 + 2 * red1) / 5; | ||
| 51 | case 4: | ||
| 52 | return (2 * red0 + 3 * red1) / 5; | ||
| 53 | case 5: | ||
| 54 | return (1 * red0 + 4 * red1) / 5; | ||
| 55 | case 6: | ||
| 56 | return 0; | ||
| 57 | case 7: | ||
| 58 | return 0xff; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | void main() { | ||
| 65 | uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; | ||
| 66 | uint64_t bits = packUint2x32(packed_bits); | ||
| 67 | uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); | ||
| 68 | uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); | ||
| 69 | imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); | ||
| 70 | } | ||
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 1 | #version 430 core | 5 | #version 430 core |
| 2 | 6 | ||
| 3 | layout (location = 0) in vec2 frag_tex_coord; | 7 | layout (location = 0) in vec2 frag_tex_coord; |
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 1 | #version 430 core | 5 | #version 430 core |
| 2 | 6 | ||
| 3 | out gl_PerVertex { | 7 | out gl_PerVertex { |
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_INPUT_BUFFER 0 | ||
| 16 | #define BINDING_OUTPUT_IMAGE 1 | ||
| 17 | |||
| 18 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 19 | |||
| 20 | #extension GL_NV_gpu_shader5 : enable | ||
| 21 | #ifdef GL_NV_gpu_shader5 | ||
| 22 | #define HAS_EXTENDED_TYPES 1 | ||
| 23 | #else | ||
| 24 | #define HAS_EXTENDED_TYPES 0 | ||
| 25 | #endif | ||
| 26 | #define BEGIN_PUSH_CONSTANTS | ||
| 27 | #define END_PUSH_CONSTANTS | ||
| 28 | #define UNIFORM(n) layout (location = n) uniform | ||
| 29 | #define BINDING_INPUT_BUFFER 0 | ||
| 30 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 31 | |||
| 32 | #endif | ||
| 33 | |||
| 34 | BEGIN_PUSH_CONSTANTS | ||
| 35 | UNIFORM(0) uvec2 origin; | ||
| 36 | UNIFORM(1) ivec2 destination; | ||
| 37 | UNIFORM(2) uint bytes_per_block; | ||
| 38 | UNIFORM(3) uint pitch; | ||
| 39 | END_PUSH_CONSTANTS | ||
| 40 | |||
| 41 | #if HAS_EXTENDED_TYPES | ||
| 42 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 43 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 44 | #endif | ||
| 45 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; | ||
| 46 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 47 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 48 | |||
| 49 | layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; | ||
| 50 | |||
| 51 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | ||
| 52 | |||
| 53 | uvec4 ReadTexel(uint offset) { | ||
| 54 | switch (bytes_per_block) { | ||
| 55 | #if HAS_EXTENDED_TYPES | ||
| 56 | case 1: | ||
| 57 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 58 | case 2: | ||
| 59 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 60 | #else | ||
| 61 | case 1: | ||
| 62 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 63 | case 2: | ||
| 64 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 65 | #endif | ||
| 66 | case 4: | ||
| 67 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 68 | case 8: | ||
| 69 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 70 | case 16: | ||
| 71 | return u128data[offset / 16]; | ||
| 72 | } | ||
| 73 | return uvec4(0); | ||
| 74 | } | ||
| 75 | |||
| 76 | void main() { | ||
| 77 | uvec2 pos = gl_GlobalInvocationID.xy + origin; | ||
| 78 | |||
| 79 | uint offset = 0; | ||
| 80 | offset += pos.x * bytes_per_block; | ||
| 81 | offset += pos.y * pitch; | ||
| 82 | |||
| 83 | const uvec4 texel = ReadTexel(offset); | ||
| 84 | const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; | ||
| 85 | imageStore(output_image, coord, texel); | ||
| 86 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D tex; | ||
| 8 | |||
| 9 | layout(location = 0) in vec2 texcoord; | ||
| 10 | layout(location = 0) out vec4 color; | ||
| 11 | |||
| 12 | void main() { | ||
| 13 | color = textureLod(tex, texcoord, 0); | ||
| 14 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | #extension GL_ARB_shader_stencil_export : require | ||
| 7 | |||
| 8 | layout(binding = 0) uniform sampler2D depth_tex; | ||
| 9 | layout(binding = 1) uniform isampler2D stencil_tex; | ||
| 10 | |||
| 11 | layout(location = 0) in vec2 texcoord; | ||
| 12 | |||
| 13 | void main() { | ||
| 14 | gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; | ||
| 15 | gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; | ||
| 16 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag index a06ecd24a..0979ff3e6 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ b/src/video_core/host_shaders/vulkan_present.frag | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (location = 0) in vec2 frag_tex_coord; | 7 | layout (location = 0) in vec2 frag_tex_coord; |
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert index c64d9235a..00b868958 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ b/src/video_core/host_shaders/vulkan_present.vert | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (location = 0) in vec2 vert_position; | 7 | layout (location = 0) in vec2 vert_position; |
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp index 5a5703308..212f4e998 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ b/src/video_core/host_shaders/vulkan_quad_array.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (local_size_x = 1024) in; | 7 | layout (local_size_x = 1024) in; |
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index 5a472ba9b..8655591d0 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V quad_indexed.comp -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (local_size_x = 1024) in; | 7 | layout (local_size_x = 1024) in; |
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index a320f3ae0..ad74d7af9 100644 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | #extension GL_EXT_shader_16bit_storage : require | 6 | #extension GL_EXT_shader_16bit_storage : require |
| 16 | #extension GL_EXT_shader_8bit_storage : require | 7 | #extension GL_EXT_shader_8bit_storage : require |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 59 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. |
| 60 | system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); | 60 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); |
| 61 | ASSERT(cpu_addr); | ||
| 62 | |||
| 63 | rasterizer->UnmapMemory(*cpu_addr, size); | ||
| 61 | 64 | ||
| 62 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); | 65 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); |
| 63 | } | 66 | } |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -1,250 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cstring> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/morton.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/textures/decoders.h" | ||
| 12 | |||
| 13 | namespace VideoCore { | ||
| 14 | |||
| 15 | using Surface::GetBytesPerPixel; | ||
| 16 | using Surface::PixelFormat; | ||
| 17 | |||
| 18 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); | ||
| 19 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | ||
| 20 | |||
| 21 | template <bool morton_to_linear, PixelFormat format> | ||
| 22 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | ||
| 23 | u32 tile_width_spacing, u8* buffer, u8* addr) { | ||
| 24 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 25 | |||
| 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 27 | // pixel values. | ||
| 28 | constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 29 | constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 30 | |||
| 31 | if constexpr (morton_to_linear) { | ||
| 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 33 | stride, height, depth, block_height, block_depth, | ||
| 34 | tile_width_spacing); | ||
| 35 | } else { | ||
| 36 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 37 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 38 | bytes_per_pixel, bytes_per_pixel, addr, buffer, false, | ||
| 39 | block_height, block_depth, tile_width_spacing); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static constexpr ConversionArray morton_to_linear_fns = { | ||
| 44 | MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, | ||
| 45 | MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, | ||
| 46 | MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, | ||
| 47 | MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, | ||
| 48 | MortonCopy<true, PixelFormat::R5G6B5_UNORM>, | ||
| 49 | MortonCopy<true, PixelFormat::B5G6R5_UNORM>, | ||
| 50 | MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, | ||
| 51 | MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, | ||
| 52 | MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, | ||
| 53 | MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, | ||
| 54 | MortonCopy<true, PixelFormat::R8_UNORM>, | ||
| 55 | MortonCopy<true, PixelFormat::R8_SNORM>, | ||
| 56 | MortonCopy<true, PixelFormat::R8_SINT>, | ||
| 57 | MortonCopy<true, PixelFormat::R8_UINT>, | ||
| 58 | MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, | ||
| 59 | MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, | ||
| 60 | MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, | ||
| 61 | MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, | ||
| 62 | MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, | ||
| 63 | MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, | ||
| 64 | MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, | ||
| 65 | MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, | ||
| 66 | MortonCopy<true, PixelFormat::BC2_UNORM>, | ||
| 67 | MortonCopy<true, PixelFormat::BC3_UNORM>, | ||
| 68 | MortonCopy<true, PixelFormat::BC4_UNORM>, | ||
| 69 | MortonCopy<true, PixelFormat::BC4_SNORM>, | ||
| 70 | MortonCopy<true, PixelFormat::BC5_UNORM>, | ||
| 71 | MortonCopy<true, PixelFormat::BC5_SNORM>, | ||
| 72 | MortonCopy<true, PixelFormat::BC7_UNORM>, | ||
| 73 | MortonCopy<true, PixelFormat::BC6H_UFLOAT>, | ||
| 74 | MortonCopy<true, PixelFormat::BC6H_SFLOAT>, | ||
| 75 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, | ||
| 76 | MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, | ||
| 77 | MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, | ||
| 78 | MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, | ||
| 79 | MortonCopy<true, PixelFormat::R32G32_FLOAT>, | ||
| 80 | MortonCopy<true, PixelFormat::R32G32_SINT>, | ||
| 81 | MortonCopy<true, PixelFormat::R32_FLOAT>, | ||
| 82 | MortonCopy<true, PixelFormat::R16_FLOAT>, | ||
| 83 | MortonCopy<true, PixelFormat::R16_UNORM>, | ||
| 84 | MortonCopy<true, PixelFormat::R16_SNORM>, | ||
| 85 | MortonCopy<true, PixelFormat::R16_UINT>, | ||
| 86 | MortonCopy<true, PixelFormat::R16_SINT>, | ||
| 87 | MortonCopy<true, PixelFormat::R16G16_UNORM>, | ||
| 88 | MortonCopy<true, PixelFormat::R16G16_FLOAT>, | ||
| 89 | MortonCopy<true, PixelFormat::R16G16_UINT>, | ||
| 90 | MortonCopy<true, PixelFormat::R16G16_SINT>, | ||
| 91 | MortonCopy<true, PixelFormat::R16G16_SNORM>, | ||
| 92 | MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, | ||
| 93 | MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, | ||
| 94 | MortonCopy<true, PixelFormat::R8G8_UNORM>, | ||
| 95 | MortonCopy<true, PixelFormat::R8G8_SNORM>, | ||
| 96 | MortonCopy<true, PixelFormat::R8G8_SINT>, | ||
| 97 | MortonCopy<true, PixelFormat::R8G8_UINT>, | ||
| 98 | MortonCopy<true, PixelFormat::R32G32_UINT>, | ||
| 99 | MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, | ||
| 100 | MortonCopy<true, PixelFormat::R32_UINT>, | ||
| 101 | MortonCopy<true, PixelFormat::R32_SINT>, | ||
| 102 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, | ||
| 103 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, | ||
| 104 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, | ||
| 105 | MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, | ||
| 106 | MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, | ||
| 107 | MortonCopy<true, PixelFormat::BC2_SRGB>, | ||
| 108 | MortonCopy<true, PixelFormat::BC3_SRGB>, | ||
| 109 | MortonCopy<true, PixelFormat::BC7_SRGB>, | ||
| 110 | MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, | ||
| 111 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 112 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 113 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 114 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 115 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, | ||
| 116 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 117 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, | ||
| 118 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 119 | MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, | ||
| 120 | MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, | ||
| 121 | MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, | ||
| 122 | MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, | ||
| 123 | MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, | ||
| 124 | MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, | ||
| 125 | MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, | ||
| 126 | MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, | ||
| 127 | MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, | ||
| 128 | MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, | ||
| 129 | MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, | ||
| 130 | MortonCopy<true, PixelFormat::D32_FLOAT>, | ||
| 131 | MortonCopy<true, PixelFormat::D16_UNORM>, | ||
| 132 | MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, | ||
| 133 | MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, | ||
| 134 | MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, | ||
| 135 | }; | ||
| 136 | |||
| 137 | static constexpr ConversionArray linear_to_morton_fns = { | ||
| 138 | MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, | ||
| 139 | MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, | ||
| 140 | MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, | ||
| 141 | MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, | ||
| 142 | MortonCopy<false, PixelFormat::R5G6B5_UNORM>, | ||
| 143 | MortonCopy<false, PixelFormat::B5G6R5_UNORM>, | ||
| 144 | MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, | ||
| 145 | MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, | ||
| 146 | MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, | ||
| 147 | MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, | ||
| 148 | MortonCopy<false, PixelFormat::R8_UNORM>, | ||
| 149 | MortonCopy<false, PixelFormat::R8_SNORM>, | ||
| 150 | MortonCopy<false, PixelFormat::R8_SINT>, | ||
| 151 | MortonCopy<false, PixelFormat::R8_UINT>, | ||
| 152 | MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, | ||
| 153 | MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, | ||
| 154 | MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, | ||
| 155 | MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, | ||
| 156 | MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, | ||
| 157 | MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, | ||
| 158 | MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, | ||
| 159 | MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, | ||
| 160 | MortonCopy<false, PixelFormat::BC2_UNORM>, | ||
| 161 | MortonCopy<false, PixelFormat::BC3_UNORM>, | ||
| 162 | MortonCopy<false, PixelFormat::BC4_UNORM>, | ||
| 163 | MortonCopy<false, PixelFormat::BC4_SNORM>, | ||
| 164 | MortonCopy<false, PixelFormat::BC5_UNORM>, | ||
| 165 | MortonCopy<false, PixelFormat::BC5_SNORM>, | ||
| 166 | MortonCopy<false, PixelFormat::BC7_UNORM>, | ||
| 167 | MortonCopy<false, PixelFormat::BC6H_UFLOAT>, | ||
| 168 | MortonCopy<false, PixelFormat::BC6H_SFLOAT>, | ||
| 169 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 170 | nullptr, | ||
| 171 | MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, | ||
| 172 | MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, | ||
| 173 | MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, | ||
| 174 | MortonCopy<false, PixelFormat::R32G32_FLOAT>, | ||
| 175 | MortonCopy<false, PixelFormat::R32G32_SINT>, | ||
| 176 | MortonCopy<false, PixelFormat::R32_FLOAT>, | ||
| 177 | MortonCopy<false, PixelFormat::R16_FLOAT>, | ||
| 178 | MortonCopy<false, PixelFormat::R16_UNORM>, | ||
| 179 | MortonCopy<false, PixelFormat::R16_SNORM>, | ||
| 180 | MortonCopy<false, PixelFormat::R16_UINT>, | ||
| 181 | MortonCopy<false, PixelFormat::R16_SINT>, | ||
| 182 | MortonCopy<false, PixelFormat::R16G16_UNORM>, | ||
| 183 | MortonCopy<false, PixelFormat::R16G16_FLOAT>, | ||
| 184 | MortonCopy<false, PixelFormat::R16G16_UINT>, | ||
| 185 | MortonCopy<false, PixelFormat::R16G16_SINT>, | ||
| 186 | MortonCopy<false, PixelFormat::R16G16_SNORM>, | ||
| 187 | MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, | ||
| 188 | MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, | ||
| 189 | MortonCopy<false, PixelFormat::R8G8_UNORM>, | ||
| 190 | MortonCopy<false, PixelFormat::R8G8_SNORM>, | ||
| 191 | MortonCopy<false, PixelFormat::R8G8_SINT>, | ||
| 192 | MortonCopy<false, PixelFormat::R8G8_UINT>, | ||
| 193 | MortonCopy<false, PixelFormat::R32G32_UINT>, | ||
| 194 | MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, | ||
| 195 | MortonCopy<false, PixelFormat::R32_UINT>, | ||
| 196 | MortonCopy<false, PixelFormat::R32_SINT>, | ||
| 197 | nullptr, | ||
| 198 | nullptr, | ||
| 199 | nullptr, | ||
| 200 | MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, | ||
| 201 | MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, | ||
| 202 | MortonCopy<false, PixelFormat::BC2_SRGB>, | ||
| 203 | MortonCopy<false, PixelFormat::BC3_SRGB>, | ||
| 204 | MortonCopy<false, PixelFormat::BC7_SRGB>, | ||
| 205 | MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, | ||
| 206 | nullptr, | ||
| 207 | nullptr, | ||
| 208 | nullptr, | ||
| 209 | nullptr, | ||
| 210 | nullptr, | ||
| 211 | nullptr, | ||
| 212 | nullptr, | ||
| 213 | nullptr, | ||
| 214 | nullptr, | ||
| 215 | nullptr, | ||
| 216 | nullptr, | ||
| 217 | nullptr, | ||
| 218 | nullptr, | ||
| 219 | nullptr, | ||
| 220 | nullptr, | ||
| 221 | nullptr, | ||
| 222 | nullptr, | ||
| 223 | nullptr, | ||
| 224 | MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, | ||
| 225 | MortonCopy<false, PixelFormat::D32_FLOAT>, | ||
| 226 | MortonCopy<false, PixelFormat::D16_UNORM>, | ||
| 227 | MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, | ||
| 228 | MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, | ||
| 229 | MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, | ||
| 230 | }; | ||
| 231 | |||
| 232 | static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { | ||
| 233 | switch (mode) { | ||
| 234 | case MortonSwizzleMode::MortonToLinear: | ||
| 235 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 236 | case MortonSwizzleMode::LinearToMorton: | ||
| 237 | return linear_to_morton_fns[static_cast<std::size_t>(format)]; | ||
| 238 | } | ||
| 239 | UNREACHABLE(); | ||
| 240 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 241 | } | ||
| 242 | |||
| 243 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | ||
| 244 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||
| 245 | u8* buffer, u8* addr) { | ||
| 246 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, | ||
| 247 | tile_width_spacing, buffer, addr); | ||
| 248 | } | ||
| 249 | |||
| 250 | } // namespace VideoCore | ||
diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h | |||
| @@ -1,18 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | |||
| 12 | enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | ||
| 13 | |||
| 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | ||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||
| 16 | u8* buffer, u8* addr); | ||
| 17 | |||
| 18 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -76,6 +76,9 @@ public: | |||
| 76 | /// Sync memory between guest and host. | 76 | /// Sync memory between guest and host. |
| 77 | virtual void SyncGuestHost() = 0; | 77 | virtual void SyncGuestHost() = 0; |
| 78 | 78 | ||
| 79 | /// Unmap memory range | ||
| 80 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||
| 81 | |||
| 79 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 82 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 80 | /// and invalidated | 83 | /// and invalidated |
| 81 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 84 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| @@ -83,6 +86,12 @@ public: | |||
| 83 | /// Notify the host renderer to wait for previous primitive and compute operations. | 86 | /// Notify the host renderer to wait for previous primitive and compute operations. |
| 84 | virtual void WaitForIdle() = 0; | 87 | virtual void WaitForIdle() = 0; |
| 85 | 88 | ||
| 89 | /// Notify the host renderer to wait for reads and writes to render targets and flush caches. | ||
| 90 | virtual void FragmentBarrier() = 0; | ||
| 91 | |||
| 92 | /// Notify the host renderer to make available previous render target writes. | ||
| 93 | virtual void TiledCacheBarrier() = 0; | ||
| 94 | |||
| 86 | /// Notify the rasterizer to send all written commands to the host GPU. | 95 | /// Notify the rasterizer to send all written commands to the host GPU. |
| 87 | virtual void FlushCommands() = 0; | 96 | virtual void FlushCommands() = 0; |
| 88 | 97 | ||
| @@ -91,8 +100,7 @@ public: | |||
| 91 | 100 | ||
| 92 | /// Attempt to use a faster method to perform a surface copy | 101 | /// Attempt to use a faster method to perform a surface copy |
| 93 | [[nodiscard]] virtual bool AccelerateSurfaceCopy( | 102 | [[nodiscard]] virtual bool AccelerateSurfaceCopy( |
| 94 | const Tegra::Engines::Fermi2D::Regs::Surface& src, | 103 | const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, |
| 95 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 96 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 104 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 97 | return false; | 105 | return false; |
| 98 | } | 106 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst | |||
| 61 | 61 | ||
| 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 64 | const Device& device_, std::size_t stream_size_) | 64 | const Device& device_, OGLStreamBuffer& stream_buffer_, |
| 65 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, | 65 | StateTracker& state_tracker) |
| 66 | std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)}, | 66 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { |
| 67 | device{device_} { | ||
| 68 | if (!device.HasFastBufferSubData()) { | 67 | if (!device.HasFastBufferSubData()) { |
| 69 | return; | 68 | return; |
| 70 | } | 69 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -22,6 +22,7 @@ namespace OpenGL { | |||
| 22 | class Device; | 22 | class Device; |
| 23 | class OGLStreamBuffer; | 23 | class OGLStreamBuffer; |
| 24 | class RasterizerOpenGL; | 24 | class RasterizerOpenGL; |
| 25 | class StateTracker; | ||
| 25 | 26 | ||
| 26 | class Buffer : public VideoCommon::BufferBlock { | 27 | class Buffer : public VideoCommon::BufferBlock { |
| 27 | public: | 28 | public: |
| @@ -52,9 +53,10 @@ private: | |||
| 52 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 53 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; |
| 53 | class OGLBufferCache final : public GenericBufferCache { | 54 | class OGLBufferCache final : public GenericBufferCache { |
| 54 | public: | 55 | public: |
| 55 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 56 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, |
| 56 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 57 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, |
| 57 | const Device& device_, std::size_t stream_size_); | 58 | const Device& device, OGLStreamBuffer& stream_buffer, |
| 59 | StateTracker& state_tracker); | ||
| 58 | ~OGLBufferCache(); | 60 | ~OGLBufferCache(); |
| 59 | 61 | ||
| 60 | BufferInfo GetEmptyBuffer(std::size_t) override; | 62 | BufferInfo GetEmptyBuffer(std::size_t) override; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -5,9 +5,11 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstdlib> | ||
| 8 | #include <cstring> | 9 | #include <cstring> |
| 9 | #include <limits> | 10 | #include <limits> |
| 10 | #include <optional> | 11 | #include <optional> |
| 12 | #include <span> | ||
| 11 | #include <vector> | 13 | #include <vector> |
| 12 | 14 | ||
| 13 | #include <glad/glad.h> | 15 | #include <glad/glad.h> |
| @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; | |||
| 27 | 29 | ||
| 28 | constexpr u32 NumStages = 5; | 30 | constexpr u32 NumStages = 5; |
| 29 | 31 | ||
| 30 | constexpr std::array LimitUBOs = { | 32 | constexpr std::array LIMIT_UBOS = { |
| 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 33 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | 34 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; | 35 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, |
| 34 | 36 | }; | |
| 35 | constexpr std::array LimitSSBOs = { | 37 | constexpr std::array LIMIT_SSBOS = { |
| 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | 38 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, |
| 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | 39 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, |
| 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; | 40 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, |
| 39 | 41 | }; | |
| 40 | constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | 42 | constexpr std::array LIMIT_SAMPLERS = { |
| 41 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | 43 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, |
| 42 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | 44 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, |
| 43 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | 45 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, |
| 44 | GL_MAX_TEXTURE_IMAGE_UNITS, | 46 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, |
| 45 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; | 47 | GL_MAX_TEXTURE_IMAGE_UNITS, |
| 46 | 48 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, | |
| 47 | constexpr std::array LimitImages = { | 49 | }; |
| 50 | constexpr std::array LIMIT_IMAGES = { | ||
| 48 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | 51 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, |
| 49 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, | 52 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, |
| 50 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; | 53 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, |
| 54 | }; | ||
| 51 | 55 | ||
| 52 | template <typename T> | 56 | template <typename T> |
| 53 | T GetInteger(GLenum pname) { | 57 | T GetInteger(GLenum pname) { |
| @@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() { | |||
| 76 | return extensions; | 80 | return extensions; |
| 77 | } | 81 | } |
| 78 | 82 | ||
| 79 | bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { | 83 | bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) { |
| 80 | return std::find(images.begin(), images.end(), extension) != images.end(); | 84 | return std::ranges::find(extensions, extension) != extensions.end(); |
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | 87 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { |
| @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | |||
| 91 | 95 | ||
| 92 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | 96 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { |
| 93 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | 97 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; |
| 94 | std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), | 98 | std::ranges::transform(LIMIT_UBOS, max.begin(), |
| 95 | [](GLenum pname) { return GetInteger<u32>(pname); }); | 99 | [](GLenum pname) { return GetInteger<u32>(pname); }); |
| 96 | return max; | 100 | return max; |
| 97 | } | 101 | } |
| 98 | 102 | ||
| @@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 115 | for (std::size_t i = 0; i < NumStages; ++i) { | 119 | for (std::size_t i = 0; i < NumStages; ++i) { |
| 116 | const std::size_t stage = stage_swizzle[i]; | 120 | const std::size_t stage = stage_swizzle[i]; |
| 117 | bindings[stage] = { | 121 | bindings[stage] = { |
| 118 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), | 122 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), |
| 119 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), | 123 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), |
| 120 | Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; | 124 | Extract(base_samplers, num_samplers, total_samplers / NumStages, |
| 125 | LIMIT_SAMPLERS[stage])}; | ||
| 121 | } | 126 | } |
| 122 | 127 | ||
| 123 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | 128 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); |
| @@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 130 | 135 | ||
| 131 | // Reserve at least 4 image bindings on the fragment stage. | 136 | // Reserve at least 4 image bindings on the fragment stage. |
| 132 | bindings[4].image = | 137 | bindings[4].image = |
| 133 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); | 138 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); |
| 134 | 139 | ||
| 135 | // This is guaranteed to be at least 1. | 140 | // This is guaranteed to be at least 1. |
| 136 | const u32 total_extracted_images = num_images / (NumStages - 1); | 141 | const u32 total_extracted_images = num_images / (NumStages - 1); |
| @@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 142 | continue; | 147 | continue; |
| 143 | } | 148 | } |
| 144 | bindings[stage].image = | 149 | bindings[stage].image = |
| 145 | Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); | 150 | Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); |
| 146 | } | 151 | } |
| 147 | 152 | ||
| 148 | // Compute doesn't care about any of this. | 153 | // Compute doesn't care about any of this. |
| @@ -188,6 +193,11 @@ bool IsASTCSupported() { | |||
| 188 | return true; | 193 | return true; |
| 189 | } | 194 | } |
| 190 | 195 | ||
| 196 | [[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { | ||
| 197 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 198 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | ||
| 199 | } | ||
| 200 | |||
| 191 | } // Anonymous namespace | 201 | } // Anonymous namespace |
| 192 | 202 | ||
| 193 | Device::Device() | 203 | Device::Device() |
| @@ -206,9 +216,8 @@ Device::Device() | |||
| 206 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 216 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 207 | disable_fast_buffer_sub_data = true; | 217 | disable_fast_buffer_sub_data = true; |
| 208 | } | 218 | } |
| 209 | 219 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | |
| 210 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 220 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 211 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 212 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 221 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 213 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 222 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 214 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | 223 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); |
| @@ -224,6 +233,7 @@ Device::Device() | |||
| 224 | has_precise_bug = TestPreciseBug(); | 233 | has_precise_bug = TestPreciseBug(); |
| 225 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 234 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 226 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | 235 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; |
| 236 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | ||
| 227 | 237 | ||
| 228 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 238 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 229 | // uniform buffers as "push constants" | 239 | // uniform buffers as "push constants" |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -36,11 +36,11 @@ public: | |||
| 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | std::size_t GetUniformBufferAlignment() const { | 39 | size_t GetUniformBufferAlignment() const { |
| 40 | return uniform_buffer_alignment; | 40 | return uniform_buffer_alignment; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | std::size_t GetShaderStorageBufferAlignment() const { | 43 | size_t GetShaderStorageBufferAlignment() const { |
| 44 | return shader_storage_alignment; | 44 | return shader_storage_alignment; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| @@ -104,6 +104,10 @@ public: | |||
| 104 | return has_nv_viewport_array2; | 104 | return has_nv_viewport_array2; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | bool HasDebuggingToolAttached() const { | ||
| 108 | return has_debugging_tool_attached; | ||
| 109 | } | ||
| 110 | |||
| 107 | bool UseAssemblyShaders() const { | 111 | bool UseAssemblyShaders() const { |
| 108 | return use_assembly_shaders; | 112 | return use_assembly_shaders; |
| 109 | } | 113 | } |
| @@ -118,8 +122,8 @@ private: | |||
| 118 | 122 | ||
| 119 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; | 123 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; |
| 120 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | 124 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; |
| 121 | std::size_t uniform_buffer_alignment{}; | 125 | size_t uniform_buffer_alignment{}; |
| 122 | std::size_t shader_storage_alignment{}; | 126 | size_t shader_storage_alignment{}; |
| 123 | u32 max_vertex_attributes{}; | 127 | u32 max_vertex_attributes{}; |
| 124 | u32 max_varyings{}; | 128 | u32 max_varyings{}; |
| 125 | u32 max_compute_shared_memory_size{}; | 129 | u32 max_compute_shared_memory_size{}; |
| @@ -135,6 +139,7 @@ private: | |||
| 135 | bool has_precise_bug{}; | 139 | bool has_precise_bug{}; |
| 136 | bool has_fast_buffer_sub_data{}; | 140 | bool has_fast_buffer_sub_data{}; |
| 137 | bool has_nv_viewport_array2{}; | 141 | bool has_nv_viewport_array2{}; |
| 142 | bool has_debugging_tool_attached{}; | ||
| 138 | bool use_assembly_shaders{}; | 143 | bool use_assembly_shaders{}; |
| 139 | bool use_asynchronous_shaders{}; | 144 | bool use_asynchronous_shaders{}; |
| 140 | }; | 145 | }; |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -46,7 +46,7 @@ void GLInnerFence::Wait() { | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, |
| 49 | Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, | 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, |
| 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) | 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} |
| 52 | 52 | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -33,12 +33,12 @@ private: | |||
| 33 | 33 | ||
| 34 | using Fence = std::shared_ptr<GLInnerFence>; | 34 | using Fence = std::shared_ptr<GLInnerFence>; |
| 35 | using GenericFenceManager = | 35 | using GenericFenceManager = |
| 36 | VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; | 36 | VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; |
| 37 | 37 | ||
| 38 | class FenceManagerOpenGL final : public GenericFenceManager { | 38 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 39 | public: | 39 | public: |
| 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 41 | TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, | 41 | TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, |
| 42 | QueryCache& query_cache_); | 42 | QueryCache& query_cache_); |
| 43 | 43 | ||
| 44 | protected: | 44 | protected: |
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null | |||
| @@ -1,85 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | #include <unordered_map> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 18 | using VideoCore::Surface::SurfaceType; | ||
| 19 | |||
| 20 | FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; | ||
| 21 | |||
| 22 | FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; | ||
| 23 | |||
| 24 | GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { | ||
| 25 | const auto [entry, is_cache_miss] = cache.try_emplace(key); | ||
| 26 | auto& framebuffer{entry->second}; | ||
| 27 | if (is_cache_miss) { | ||
| 28 | framebuffer = CreateFramebuffer(key); | ||
| 29 | } | ||
| 30 | return framebuffer.handle; | ||
| 31 | } | ||
| 32 | |||
| 33 | OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { | ||
| 34 | OGLFramebuffer framebuffer; | ||
| 35 | framebuffer.Create(); | ||
| 36 | |||
| 37 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. | ||
| 38 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); | ||
| 39 | |||
| 40 | if (key.zeta) { | ||
| 41 | const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; | ||
| 42 | const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; | ||
| 43 | key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t num_buffers = 0; | ||
| 47 | std::array<GLenum, Maxwell::NumRenderTargets> targets; | ||
| 48 | |||
| 49 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 50 | if (!key.colors[index]) { | ||
| 51 | targets[index] = GL_NONE; | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index); | ||
| 55 | key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); | ||
| 56 | |||
| 57 | const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; | ||
| 58 | targets[index] = GL_COLOR_ATTACHMENT0 + attachment; | ||
| 59 | num_buffers = index + 1; | ||
| 60 | } | ||
| 61 | |||
| 62 | if (num_buffers > 0) { | ||
| 63 | glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets)); | ||
| 64 | } else { | ||
| 65 | glDrawBuffer(GL_NONE); | ||
| 66 | } | ||
| 67 | |||
| 68 | return framebuffer; | ||
| 69 | } | ||
| 70 | |||
| 71 | std::size_t FramebufferCacheKey::Hash() const noexcept { | ||
| 72 | std::size_t hash = std::hash<View>{}(zeta); | ||
| 73 | for (const auto& color : colors) { | ||
| 74 | hash ^= std::hash<View>{}(color); | ||
| 75 | } | ||
| 76 | hash ^= static_cast<std::size_t>(color_attachments) << 16; | ||
| 77 | return hash; | ||
| 78 | } | ||
| 79 | |||
| 80 | bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 81 | return std::tie(colors, zeta, color_attachments) == | ||
| 82 | std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <glad/glad.h> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | constexpr std::size_t BitsPerAttachment = 4; | ||
| 21 | |||
| 22 | struct FramebufferCacheKey { | ||
| 23 | View zeta; | ||
| 24 | std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; | ||
| 25 | u32 color_attachments = 0; | ||
| 26 | |||
| 27 | std::size_t Hash() const noexcept; | ||
| 28 | |||
| 29 | bool operator==(const FramebufferCacheKey& rhs) const noexcept; | ||
| 30 | |||
| 31 | bool operator!=(const FramebufferCacheKey& rhs) const noexcept { | ||
| 32 | return !operator==(rhs); | ||
| 33 | } | ||
| 34 | |||
| 35 | void SetAttachment(std::size_t index, u32 attachment) { | ||
| 36 | color_attachments |= attachment << (BitsPerAttachment * index); | ||
| 37 | } | ||
| 38 | }; | ||
| 39 | |||
| 40 | } // namespace OpenGL | ||
| 41 | |||
| 42 | namespace std { | ||
| 43 | |||
| 44 | template <> | ||
| 45 | struct hash<OpenGL::FramebufferCacheKey> { | ||
| 46 | std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { | ||
| 47 | return k.Hash(); | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace std | ||
| 52 | |||
| 53 | namespace OpenGL { | ||
| 54 | |||
| 55 | class FramebufferCacheOpenGL { | ||
| 56 | public: | ||
| 57 | FramebufferCacheOpenGL(); | ||
| 58 | ~FramebufferCacheOpenGL(); | ||
| 59 | |||
| 60 | GLuint GetFramebuffer(const FramebufferCacheKey& key); | ||
| 61 | |||
| 62 | private: | ||
| 63 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); | ||
| 64 | |||
| 65 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -25,12 +25,15 @@ | |||
| 25 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 26 | #include "video_core/engines/shader_type.h" | 26 | #include "video_core/engines/shader_type.h" |
| 27 | #include "video_core/memory_manager.h" | 27 | #include "video_core/memory_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 29 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 30 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 31 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 32 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 33 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 32 | #include "video_core/renderer_opengl/renderer_opengl.h" | 34 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 33 | #include "video_core/shader_cache.h" | 35 | #include "video_core/shader_cache.h" |
| 36 | #include "video_core/texture_cache/texture_cache.h" | ||
| 34 | 37 | ||
| 35 | namespace OpenGL { | 38 | namespace OpenGL { |
| 36 | 39 | ||
| @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 | |||
| 55 | 58 | ||
| 56 | namespace { | 59 | namespace { |
| 57 | 60 | ||
| 58 | constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | 61 | constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; |
| 59 | constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | 62 | constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = |
| 60 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | 63 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; |
| 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | 64 | constexpr size_t TOTAL_CONST_BUFFER_BYTES = |
| 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | 65 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; |
| 63 | 66 | ||
| 64 | constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 67 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 65 | constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | 68 | constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; |
| 69 | |||
| 70 | constexpr size_t MAX_TEXTURES = 192; | ||
| 71 | constexpr size_t MAX_IMAGES = 48; | ||
| 72 | |||
| 73 | struct TextureHandle { | ||
| 74 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 75 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 76 | image = handle.tic_id; | ||
| 77 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 image; | ||
| 81 | u32 sampler; | ||
| 82 | }; | ||
| 66 | 83 | ||
| 67 | template <typename Engine, typename Entry> | 84 | template <typename Engine, typename Entry> |
| 68 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 85 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |
| 69 | ShaderType shader_type, std::size_t index = 0) { | 86 | ShaderType shader_type, size_t index = 0) { |
| 70 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | 87 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { |
| 71 | if (entry.is_separated) { | 88 | if (entry.is_separated) { |
| 72 | const u32 buffer_1 = entry.buffer; | 89 | const u32 buffer_1 = entry.buffer; |
| @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 75 | const u32 offset_2 = entry.secondary_offset; | 92 | const u32 offset_2 = entry.secondary_offset; |
| 76 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | 93 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |
| 77 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | 94 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |
| 78 | return engine.GetTextureInfo(handle_1 | handle_2); | 95 | return TextureHandle(handle_1 | handle_2, via_header_index); |
| 79 | } | 96 | } |
| 80 | } | 97 | } |
| 81 | if (entry.is_bindless) { | 98 | if (entry.is_bindless) { |
| 82 | const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | 99 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 83 | return engine.GetTextureInfo(handle); | 100 | return TextureHandle(raw, via_header_index); |
| 84 | } | ||
| 85 | |||
| 86 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 87 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 88 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 89 | return engine.GetStageTexture(shader_type, offset); | ||
| 90 | } else { | ||
| 91 | return engine.GetTexture(offset); | ||
| 92 | } | 101 | } |
| 102 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 103 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 104 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 93 | } | 105 | } |
| 94 | 106 | ||
| 95 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 107 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 97 | if (!entry.IsIndirect()) { | 109 | if (!entry.IsIndirect()) { |
| 98 | return entry.GetSize(); | 110 | return entry.GetSize(); |
| 99 | } | 111 | } |
| 100 | |||
| 101 | if (buffer.size > Maxwell::MaxConstBufferSize) { | 112 | if (buffer.size > Maxwell::MaxConstBufferSize) { |
| 102 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | 113 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, |
| 103 | Maxwell::MaxConstBufferSize); | 114 | Maxwell::MaxConstBufferSize); |
| @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss | |||
| 147 | reinterpret_cast<const GLuint*>(ssbos)); | 158 | reinterpret_cast<const GLuint*>(ssbos)); |
| 148 | } | 159 | } |
| 149 | 160 | ||
| 161 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 162 | if (entry.is_buffer) { | ||
| 163 | return ImageViewType::Buffer; | ||
| 164 | } | ||
| 165 | switch (entry.type) { | ||
| 166 | case Tegra::Shader::TextureType::Texture1D: | ||
| 167 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 168 | case Tegra::Shader::TextureType::Texture2D: | ||
| 169 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::TextureType::Texture3D: | ||
| 171 | return ImageViewType::e3D; | ||
| 172 | case Tegra::Shader::TextureType::TextureCube: | ||
| 173 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 174 | } | ||
| 175 | UNREACHABLE(); | ||
| 176 | return ImageViewType::e2D; | ||
| 177 | } | ||
| 178 | |||
| 179 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 180 | switch (entry.type) { | ||
| 181 | case Tegra::Shader::ImageType::Texture1D: | ||
| 182 | return ImageViewType::e1D; | ||
| 183 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 184 | return ImageViewType::e1DArray; | ||
| 185 | case Tegra::Shader::ImageType::Texture2D: | ||
| 186 | return ImageViewType::e2D; | ||
| 187 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 188 | return ImageViewType::e2DArray; | ||
| 189 | case Tegra::Shader::ImageType::Texture3D: | ||
| 190 | return ImageViewType::e3D; | ||
| 191 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 192 | return ImageViewType::Buffer; | ||
| 193 | } | ||
| 194 | UNREACHABLE(); | ||
| 195 | return ImageViewType::e2D; | ||
| 196 | } | ||
| 197 | |||
| 150 | } // Anonymous namespace | 198 | } // Anonymous namespace |
| 151 | 199 | ||
| 152 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 200 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 153 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 201 | Core::Memory::Memory& cpu_memory_, const Device& device_, |
| 154 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 202 | ScreenInfo& screen_info_, ProgramManager& program_manager_, |
| 155 | StateTracker& state_tracker_) | 203 | StateTracker& state_tracker_) |
| 156 | : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), | 204 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), |
| 157 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | 205 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), |
| 158 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | 206 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), |
| 159 | texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), | 207 | stream_buffer(device, state_tracker), |
| 208 | texture_cache_runtime(device, program_manager, state_tracker), | ||
| 209 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||
| 160 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 210 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 161 | query_cache(*this, maxwell3d, gpu_memory), | 211 | query_cache(*this, maxwell3d, gpu_memory), |
| 162 | buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), | 212 | buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), |
| 163 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 213 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 164 | async_shaders(emu_window_) { | 214 | async_shaders(emu_window_) { |
| 165 | CheckExtensions(); | ||
| 166 | |||
| 167 | unified_uniform_buffer.Create(); | 215 | unified_uniform_buffer.Create(); |
| 168 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | 216 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); |
| 169 | 217 | ||
| @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 174 | nullptr, 0); | 222 | nullptr, 0); |
| 175 | } | 223 | } |
| 176 | } | 224 | } |
| 177 | |||
| 178 | if (device.UseAsynchronousShaders()) { | 225 | if (device.UseAsynchronousShaders()) { |
| 179 | async_shaders.AllocateWorkers(); | 226 | async_shaders.AllocateWorkers(); |
| 180 | } | 227 | } |
| @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||
| 186 | } | 233 | } |
| 187 | } | 234 | } |
| 188 | 235 | ||
| 189 | void RasterizerOpenGL::CheckExtensions() { | ||
| 190 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 191 | LOG_WARNING( | ||
| 192 | Render_OpenGL, | ||
| 193 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void RasterizerOpenGL::SetupVertexFormat() { | 236 | void RasterizerOpenGL::SetupVertexFormat() { |
| 198 | auto& flags = maxwell3d.dirty.flags; | 237 | auto& flags = maxwell3d.dirty.flags; |
| 199 | if (!flags[Dirty::VertexFormats]) { | 238 | if (!flags[Dirty::VertexFormats]) { |
| @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 316 | return info.offset; | 355 | return info.offset; |
| 317 | } | 356 | } |
| 318 | 357 | ||
| 319 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 358 | void RasterizerOpenGL::SetupShaders() { |
| 320 | MICROPROFILE_SCOPE(OpenGL_Shader); | 359 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 321 | u32 clip_distances = 0; | 360 | u32 clip_distances = 0; |
| 322 | 361 | ||
| 362 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||
| 363 | image_view_indices.clear(); | ||
| 364 | sampler_handles.clear(); | ||
| 365 | |||
| 366 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 367 | |||
| 323 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 368 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 324 | const auto& shader_config = maxwell3d.regs.shader_config[index]; | 369 | const auto& shader_config = maxwell3d.regs.shader_config[index]; |
| 325 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 370 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 338 | } | 383 | } |
| 339 | continue; | 384 | continue; |
| 340 | } | 385 | } |
| 341 | |||
| 342 | // Currently this stages are not supported in the OpenGL backend. | 386 | // Currently this stages are not supported in the OpenGL backend. |
| 343 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | 387 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL |
| 344 | if (program == Maxwell::ShaderProgram::TesselationControl || | 388 | if (program == Maxwell::ShaderProgram::TesselationControl || |
| @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 347 | } | 391 | } |
| 348 | 392 | ||
| 349 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); | 393 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); |
| 350 | |||
| 351 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | 394 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; |
| 352 | switch (program) { | 395 | switch (program) { |
| 353 | case Maxwell::ShaderProgram::VertexA: | 396 | case Maxwell::ShaderProgram::VertexA: |
| @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 363 | default: | 406 | default: |
| 364 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 407 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 365 | shader_config.enable.Value(), shader_config.offset); | 408 | shader_config.enable.Value(), shader_config.offset); |
| 409 | break; | ||
| 366 | } | 410 | } |
| 367 | 411 | ||
| 368 | // Stage indices are 0 - 5 | 412 | // Stage indices are 0 - 5 |
| 369 | const std::size_t stage = index == 0 ? 0 : index - 1; | 413 | const size_t stage = index == 0 ? 0 : index - 1; |
| 414 | shaders[stage] = shader; | ||
| 415 | |||
| 370 | SetupDrawConstBuffers(stage, shader); | 416 | SetupDrawConstBuffers(stage, shader); |
| 371 | SetupDrawGlobalMemory(stage, shader); | 417 | SetupDrawGlobalMemory(stage, shader); |
| 372 | SetupDrawTextures(stage, shader); | 418 | SetupDrawTextures(shader, stage); |
| 373 | SetupDrawImages(stage, shader); | 419 | SetupDrawImages(shader, stage); |
| 374 | 420 | ||
| 375 | // Workaround for Intel drivers. | 421 | // Workaround for Intel drivers. |
| 376 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 422 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 384 | ++index; | 430 | ++index; |
| 385 | } | 431 | } |
| 386 | } | 432 | } |
| 387 | |||
| 388 | SyncClipEnabled(clip_distances); | 433 | SyncClipEnabled(clip_distances); |
| 389 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | 434 | maxwell3d.dirty.flags[Dirty::Shaders] = false; |
| 435 | |||
| 436 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 437 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 438 | |||
| 439 | size_t image_view_index = 0; | ||
| 440 | size_t texture_index = 0; | ||
| 441 | size_t image_index = 0; | ||
| 442 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 443 | const Shader* const shader = shaders[stage]; | ||
| 444 | if (shader) { | ||
| 445 | const auto base = device.GetBaseBindings(stage); | ||
| 446 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 447 | texture_index, image_index); | ||
| 448 | } | ||
| 449 | } | ||
| 390 | } | 450 | } |
| 391 | 451 | ||
| 392 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 452 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s | |||
| 417 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); | 477 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); |
| 418 | } | 478 | } |
| 419 | 479 | ||
| 420 | void RasterizerOpenGL::ConfigureFramebuffers() { | ||
| 421 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | ||
| 422 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; | ||
| 426 | |||
| 427 | texture_cache.GuardRenderTargets(true); | ||
| 428 | |||
| 429 | View depth_surface = texture_cache.GetDepthBufferSurface(true); | ||
| 430 | |||
| 431 | const auto& regs = maxwell3d.regs; | ||
| 432 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | ||
| 433 | |||
| 434 | // Bind the framebuffer surfaces | ||
| 435 | FramebufferCacheKey key; | ||
| 436 | const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); | ||
| 437 | for (std::size_t index = 0; index < colors_count; ++index) { | ||
| 438 | View color_surface{texture_cache.GetColorBufferSurface(index, true)}; | ||
| 439 | if (!color_surface) { | ||
| 440 | continue; | ||
| 441 | } | ||
| 442 | // Assume that a surface will be written to if it is used as a framebuffer, even | ||
| 443 | // if the shader doesn't actually write to it. | ||
| 444 | texture_cache.MarkColorBufferInUse(index); | ||
| 445 | |||
| 446 | key.SetAttachment(index, regs.rt_control.GetMap(index)); | ||
| 447 | key.colors[index] = std::move(color_surface); | ||
| 448 | } | ||
| 449 | |||
| 450 | if (depth_surface) { | ||
| 451 | // Assume that a surface will be written to if it is used as a framebuffer, even if | ||
| 452 | // the shader doesn't actually write to it. | ||
| 453 | texture_cache.MarkDepthBufferInUse(); | ||
| 454 | key.zeta = std::move(depth_surface); | ||
| 455 | } | ||
| 456 | |||
| 457 | texture_cache.GuardRenderTargets(false); | ||
| 458 | |||
| 459 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); | ||
| 460 | } | ||
| 461 | |||
| 462 | void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { | ||
| 463 | const auto& regs = maxwell3d.regs; | ||
| 464 | |||
| 465 | texture_cache.GuardRenderTargets(true); | ||
| 466 | View color_surface; | ||
| 467 | |||
| 468 | if (using_color) { | ||
| 469 | // Determine if we have to preserve the contents. | ||
| 470 | // First we have to make sure all clear masks are enabled. | ||
| 471 | bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || | ||
| 472 | !regs.clear_buffers.B || !regs.clear_buffers.A; | ||
| 473 | const std::size_t index = regs.clear_buffers.RT; | ||
| 474 | if (regs.clear_flags.scissor) { | ||
| 475 | // Then we have to confirm scissor testing clears the whole image. | ||
| 476 | const auto& scissor = regs.scissor_test[0]; | ||
| 477 | preserve_contents |= scissor.min_x > 0; | ||
| 478 | preserve_contents |= scissor.min_y > 0; | ||
| 479 | preserve_contents |= scissor.max_x < regs.rt[index].width; | ||
| 480 | preserve_contents |= scissor.max_y < regs.rt[index].height; | ||
| 481 | } | ||
| 482 | |||
| 483 | color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); | ||
| 484 | texture_cache.MarkColorBufferInUse(index); | ||
| 485 | } | ||
| 486 | |||
| 487 | View depth_surface; | ||
| 488 | if (using_depth_stencil) { | ||
| 489 | bool preserve_contents = false; | ||
| 490 | if (regs.clear_flags.scissor) { | ||
| 491 | // For depth stencil clears we only have to confirm scissor test covers the whole image. | ||
| 492 | const auto& scissor = regs.scissor_test[0]; | ||
| 493 | preserve_contents |= scissor.min_x > 0; | ||
| 494 | preserve_contents |= scissor.min_y > 0; | ||
| 495 | preserve_contents |= scissor.max_x < regs.zeta_width; | ||
| 496 | preserve_contents |= scissor.max_y < regs.zeta_height; | ||
| 497 | } | ||
| 498 | |||
| 499 | depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); | ||
| 500 | texture_cache.MarkDepthBufferInUse(); | ||
| 501 | } | ||
| 502 | texture_cache.GuardRenderTargets(false); | ||
| 503 | |||
| 504 | FramebufferCacheKey key; | ||
| 505 | key.colors[0] = std::move(color_surface); | ||
| 506 | key.zeta = std::move(depth_surface); | ||
| 507 | |||
| 508 | state_tracker.NotifyFramebuffer(); | ||
| 509 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); | ||
| 510 | } | ||
| 511 | |||
| 512 | void RasterizerOpenGL::Clear() { | 480 | void RasterizerOpenGL::Clear() { |
| 513 | if (!maxwell3d.ShouldExecute()) { | 481 | if (!maxwell3d.ShouldExecute()) { |
| 514 | return; | 482 | return; |
| @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { | |||
| 523 | regs.clear_buffers.A) { | 491 | regs.clear_buffers.A) { |
| 524 | use_color = true; | 492 | use_color = true; |
| 525 | 493 | ||
| 526 | state_tracker.NotifyColorMask0(); | 494 | const GLuint index = regs.clear_buffers.RT; |
| 527 | glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, | 495 | state_tracker.NotifyColorMask(index); |
| 496 | glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, | ||
| 528 | regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); | 497 | regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); |
| 529 | 498 | ||
| 530 | // TODO(Rodrigo): Determine if clamping is used on clears | 499 | // TODO(Rodrigo): Determine if clamping is used on clears |
| @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { | |||
| 557 | state_tracker.NotifyScissor0(); | 526 | state_tracker.NotifyScissor0(); |
| 558 | glDisablei(GL_SCISSOR_TEST, 0); | 527 | glDisablei(GL_SCISSOR_TEST, 0); |
| 559 | } | 528 | } |
| 560 | |||
| 561 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); | 529 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 562 | 530 | ||
| 563 | ConfigureClearFramebuffer(use_color, use_depth || use_stencil); | 531 | { |
| 532 | auto lock = texture_cache.AcquireLock(); | ||
| 533 | texture_cache.UpdateRenderTargets(true); | ||
| 534 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 535 | } | ||
| 564 | 536 | ||
| 565 | if (use_color) { | 537 | if (use_color) { |
| 566 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); | 538 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| 567 | } | 539 | } |
| 568 | |||
| 569 | if (use_depth && use_stencil) { | 540 | if (use_depth && use_stencil) { |
| 570 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 541 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 571 | } else if (use_depth) { | 542 | } else if (use_depth) { |
| @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 622 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 593 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 623 | 594 | ||
| 624 | // Prepare the vertex array. | 595 | // Prepare the vertex array. |
| 625 | const bool invalidated = buffer_cache.Map(buffer_size); | 596 | buffer_cache.Map(buffer_size); |
| 626 | |||
| 627 | if (invalidated) { | ||
| 628 | // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty | ||
| 629 | auto& dirty = maxwell3d.dirty.flags; | ||
| 630 | dirty[Dirty::VertexBuffers] = true; | ||
| 631 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 632 | dirty[index] = true; | ||
| 633 | } | ||
| 634 | } | ||
| 635 | 597 | ||
| 636 | // Prepare vertex array format. | 598 | // Prepare vertex array format. |
| 637 | SetupVertexFormat(); | 599 | SetupVertexFormat(); |
| @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 655 | } | 617 | } |
| 656 | 618 | ||
| 657 | // Setup shaders and their used resources. | 619 | // Setup shaders and their used resources. |
| 658 | texture_cache.GuardSamplers(true); | 620 | auto lock = texture_cache.AcquireLock(); |
| 659 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | 621 | SetupShaders(); |
| 660 | SetupShaders(primitive_mode); | ||
| 661 | texture_cache.GuardSamplers(false); | ||
| 662 | |||
| 663 | ConfigureFramebuffers(); | ||
| 664 | 622 | ||
| 665 | // Signal the buffer cache that we are not going to upload more things. | 623 | // Signal the buffer cache that we are not going to upload more things. |
| 666 | buffer_cache.Unmap(); | 624 | buffer_cache.Unmap(); |
| 667 | 625 | texture_cache.UpdateRenderTargets(false); | |
| 626 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 668 | program_manager.BindGraphicsPipeline(); | 627 | program_manager.BindGraphicsPipeline(); |
| 669 | 628 | ||
| 670 | if (texture_cache.TextureBarrier()) { | 629 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); |
| 671 | glTextureBarrier(); | ||
| 672 | } | ||
| 673 | |||
| 674 | BeginTransformFeedback(primitive_mode); | 630 | BeginTransformFeedback(primitive_mode); |
| 675 | 631 | ||
| 676 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); | 632 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); |
| @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 722 | buffer_cache.Acquire(); | 678 | buffer_cache.Acquire(); |
| 723 | current_cbuf = 0; | 679 | current_cbuf = 0; |
| 724 | 680 | ||
| 725 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 681 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); |
| 726 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 727 | 682 | ||
| 728 | SetupComputeTextures(kernel); | 683 | auto lock = texture_cache.AcquireLock(); |
| 729 | SetupComputeImages(kernel); | 684 | BindComputeTextures(kernel); |
| 730 | 685 | ||
| 731 | const std::size_t buffer_size = | 686 | const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * |
| 732 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 687 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 733 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 734 | buffer_cache.Map(buffer_size); | 688 | buffer_cache.Map(buffer_size); |
| 735 | 689 | ||
| 736 | SetupComputeConstBuffers(kernel); | 690 | SetupComputeConstBuffers(kernel); |
| @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 739 | buffer_cache.Unmap(); | 693 | buffer_cache.Unmap(); |
| 740 | 694 | ||
| 741 | const auto& launch_desc = kepler_compute.launch_description; | 695 | const auto& launch_desc = kepler_compute.launch_description; |
| 742 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 743 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 696 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 744 | ++num_queued_commands; | 697 | ++num_queued_commands; |
| 745 | } | 698 | } |
| @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 760 | if (addr == 0 || size == 0) { | 713 | if (addr == 0 || size == 0) { |
| 761 | return; | 714 | return; |
| 762 | } | 715 | } |
| 763 | texture_cache.FlushRegion(addr, size); | 716 | { |
| 717 | auto lock = texture_cache.AcquireLock(); | ||
| 718 | texture_cache.DownloadMemory(addr, size); | ||
| 719 | } | ||
| 764 | buffer_cache.FlushRegion(addr, size); | 720 | buffer_cache.FlushRegion(addr, size); |
| 765 | query_cache.FlushRegion(addr, size); | 721 | query_cache.FlushRegion(addr, size); |
| 766 | } | 722 | } |
| @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | |||
| 769 | if (!Settings::IsGPULevelHigh()) { | 725 | if (!Settings::IsGPULevelHigh()) { |
| 770 | return buffer_cache.MustFlushRegion(addr, size); | 726 | return buffer_cache.MustFlushRegion(addr, size); |
| 771 | } | 727 | } |
| 772 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | 728 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 729 | buffer_cache.MustFlushRegion(addr, size); | ||
| 773 | } | 730 | } |
| 774 | 731 | ||
| 775 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 732 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 777 | if (addr == 0 || size == 0) { | 734 | if (addr == 0 || size == 0) { |
| 778 | return; | 735 | return; |
| 779 | } | 736 | } |
| 780 | texture_cache.InvalidateRegion(addr, size); | 737 | { |
| 738 | auto lock = texture_cache.AcquireLock(); | ||
| 739 | texture_cache.WriteMemory(addr, size); | ||
| 740 | } | ||
| 781 | shader_cache.InvalidateRegion(addr, size); | 741 | shader_cache.InvalidateRegion(addr, size); |
| 782 | buffer_cache.InvalidateRegion(addr, size); | 742 | buffer_cache.InvalidateRegion(addr, size); |
| 783 | query_cache.InvalidateRegion(addr, size); | 743 | query_cache.InvalidateRegion(addr, size); |
| @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 788 | if (addr == 0 || size == 0) { | 748 | if (addr == 0 || size == 0) { |
| 789 | return; | 749 | return; |
| 790 | } | 750 | } |
| 791 | texture_cache.OnCPUWrite(addr, size); | 751 | { |
| 752 | auto lock = texture_cache.AcquireLock(); | ||
| 753 | texture_cache.WriteMemory(addr, size); | ||
| 754 | } | ||
| 792 | shader_cache.OnCPUWrite(addr, size); | 755 | shader_cache.OnCPUWrite(addr, size); |
| 793 | buffer_cache.OnCPUWrite(addr, size); | 756 | buffer_cache.OnCPUWrite(addr, size); |
| 794 | } | 757 | } |
| 795 | 758 | ||
| 796 | void RasterizerOpenGL::SyncGuestHost() { | 759 | void RasterizerOpenGL::SyncGuestHost() { |
| 797 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 760 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 798 | texture_cache.SyncGuestHost(); | ||
| 799 | buffer_cache.SyncGuestHost(); | 761 | buffer_cache.SyncGuestHost(); |
| 800 | shader_cache.SyncGuestHost(); | 762 | shader_cache.SyncGuestHost(); |
| 801 | } | 763 | } |
| 802 | 764 | ||
| 765 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||
| 766 | { | ||
| 767 | auto lock = texture_cache.AcquireLock(); | ||
| 768 | texture_cache.UnmapMemory(addr, size); | ||
| 769 | } | ||
| 770 | buffer_cache.OnCPUWrite(addr, size); | ||
| 771 | shader_cache.OnCPUWrite(addr, size); | ||
| 772 | } | ||
| 773 | |||
| 803 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 774 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 804 | if (!gpu.IsAsync()) { | 775 | if (!gpu.IsAsync()) { |
| 805 | gpu_memory.Write<u32>(addr, value); | 776 | gpu_memory.Write<u32>(addr, value); |
| @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { | |||
| 841 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); | 812 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); |
| 842 | } | 813 | } |
| 843 | 814 | ||
| 815 | void RasterizerOpenGL::FragmentBarrier() { | ||
| 816 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); | ||
| 817 | } | ||
| 818 | |||
| 819 | void RasterizerOpenGL::TiledCacheBarrier() { | ||
| 820 | glTextureBarrier(); | ||
| 821 | } | ||
| 822 | |||
| 844 | void RasterizerOpenGL::FlushCommands() { | 823 | void RasterizerOpenGL::FlushCommands() { |
| 845 | // Only flush when we have commands queued to OpenGL. | 824 | // Only flush when we have commands queued to OpenGL. |
| 846 | if (num_queued_commands == 0) { | 825 | if (num_queued_commands == 0) { |
| @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() { | |||
| 854 | // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. | 833 | // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. |
| 855 | num_queued_commands = 0; | 834 | num_queued_commands = 0; |
| 856 | 835 | ||
| 836 | fence_manager.TickFrame(); | ||
| 857 | buffer_cache.TickFrame(); | 837 | buffer_cache.TickFrame(); |
| 838 | { | ||
| 839 | auto lock = texture_cache.AcquireLock(); | ||
| 840 | texture_cache.TickFrame(); | ||
| 841 | } | ||
| 858 | } | 842 | } |
| 859 | 843 | ||
| 860 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 844 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 861 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 845 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 862 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 846 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 863 | MICROPROFILE_SCOPE(OpenGL_Blits); | 847 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 864 | texture_cache.DoFermiCopy(src, dst, copy_config); | 848 | auto lock = texture_cache.AcquireLock(); |
| 849 | texture_cache.BlitImage(dst, src, copy_config); | ||
| 865 | return true; | 850 | return true; |
| 866 | } | 851 | } |
| 867 | 852 | ||
| 868 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 853 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 869 | VAddr framebuffer_addr, u32 pixel_stride) { | 854 | VAddr framebuffer_addr, u32 pixel_stride) { |
| 870 | if (!framebuffer_addr) { | 855 | if (framebuffer_addr == 0) { |
| 871 | return {}; | 856 | return false; |
| 872 | } | 857 | } |
| 873 | |||
| 874 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 858 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 875 | 859 | ||
| 876 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | 860 | auto lock = texture_cache.AcquireLock(); |
| 877 | if (!surface) { | 861 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; |
| 878 | return {}; | 862 | if (!image_view) { |
| 863 | return false; | ||
| 879 | } | 864 | } |
| 880 | |||
| 881 | // Verify that the cached surface is the same size and format as the requested framebuffer | 865 | // Verify that the cached surface is the same size and format as the requested framebuffer |
| 882 | const auto& params{surface->GetSurfaceParams()}; | 866 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); |
| 883 | const auto& pixel_format{ | 867 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); |
| 884 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 885 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | ||
| 886 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | ||
| 887 | 868 | ||
| 888 | if (params.pixel_format != pixel_format) { | 869 | screen_info.display_texture = image_view->Handle(ImageViewType::e2D); |
| 889 | LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); | 870 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 890 | } | 871 | return true; |
| 872 | } | ||
| 891 | 873 | ||
| 892 | screen_info.display_texture = surface->GetTexture(); | 874 | void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { |
| 893 | screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; | 875 | image_view_indices.clear(); |
| 876 | sampler_handles.clear(); | ||
| 894 | 877 | ||
| 895 | return true; | 878 | texture_cache.SynchronizeComputeDescriptors(); |
| 879 | |||
| 880 | SetupComputeTextures(kernel); | ||
| 881 | SetupComputeImages(kernel); | ||
| 882 | |||
| 883 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 884 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 885 | |||
| 886 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 887 | size_t image_view_index = 0; | ||
| 888 | size_t texture_index = 0; | ||
| 889 | size_t image_index = 0; | ||
| 890 | BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); | ||
| 891 | } | ||
| 892 | |||
| 893 | void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, | ||
| 894 | GLuint base_image, size_t& image_view_index, | ||
| 895 | size_t& texture_index, size_t& image_index) { | ||
| 896 | const GLuint* const samplers = sampler_handles.data() + texture_index; | ||
| 897 | const GLuint* const textures = texture_handles.data() + texture_index; | ||
| 898 | const GLuint* const images = image_handles.data() + image_index; | ||
| 899 | |||
| 900 | const size_t num_samplers = entries.samplers.size(); | ||
| 901 | for (const auto& sampler : entries.samplers) { | ||
| 902 | for (size_t i = 0; i < sampler.size; ++i) { | ||
| 903 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 904 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 905 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); | ||
| 906 | texture_handles[texture_index++] = handle; | ||
| 907 | } | ||
| 908 | } | ||
| 909 | const size_t num_images = entries.images.size(); | ||
| 910 | for (size_t unit = 0; unit < num_images; ++unit) { | ||
| 911 | // TODO: Mark as modified | ||
| 912 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 913 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 914 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); | ||
| 915 | image_handles[image_index] = handle; | ||
| 916 | ++image_index; | ||
| 917 | } | ||
| 918 | if (num_samplers > 0) { | ||
| 919 | glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); | ||
| 920 | glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); | ||
| 921 | } | ||
| 922 | if (num_images > 0) { | ||
| 923 | glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); | ||
| 924 | } | ||
| 896 | } | 925 | } |
| 897 | 926 | ||
| 898 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | 927 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { |
| @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||
| 999 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | 1028 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, |
| 1000 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | 1029 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, |
| 1001 | }; | 1030 | }; |
| 1002 | |||
| 1003 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | 1031 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; |
| 1004 | const auto& entries{shader->GetEntries().global_memory_entries}; | 1032 | const auto& entries{shader->GetEntries().global_memory_entries}; |
| 1005 | 1033 | ||
| @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e | |||
| 1056 | } | 1084 | } |
| 1057 | } | 1085 | } |
| 1058 | 1086 | ||
| 1059 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { | 1087 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { |
| 1060 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1088 | const bool via_header_index = |
| 1061 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 1089 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1062 | for (const auto& entry : shader->GetEntries().samplers) { | 1090 | for (const auto& entry : shader->GetEntries().samplers) { |
| 1063 | const auto shader_type = static_cast<ShaderType>(stage_index); | 1091 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 1064 | for (std::size_t i = 0; i < entry.size; ++i) { | 1092 | for (size_t index = 0; index < entry.size; ++index) { |
| 1065 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | 1093 | const auto handle = |
| 1066 | SetupTexture(binding++, texture, entry); | 1094 | GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); |
| 1095 | const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 1096 | sampler_handles.push_back(sampler->Handle()); | ||
| 1097 | image_view_indices.push_back(handle.image); | ||
| 1067 | } | 1098 | } |
| 1068 | } | 1099 | } |
| 1069 | } | 1100 | } |
| 1070 | 1101 | ||
| 1071 | void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { | 1102 | void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { |
| 1072 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1103 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1073 | u32 binding = 0; | ||
| 1074 | for (const auto& entry : kernel->GetEntries().samplers) { | 1104 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 1075 | for (std::size_t i = 0; i < entry.size; ++i) { | 1105 | for (size_t i = 0; i < entry.size; ++i) { |
| 1076 | const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); | 1106 | const auto handle = |
| 1077 | SetupTexture(binding++, texture, entry); | 1107 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); |
| 1108 | const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 1109 | sampler_handles.push_back(sampler->Handle()); | ||
| 1110 | image_view_indices.push_back(handle.image); | ||
| 1078 | } | 1111 | } |
| 1079 | } | 1112 | } |
| 1080 | } | 1113 | } |
| 1081 | 1114 | ||
| 1082 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 1115 | void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { |
| 1083 | const SamplerEntry& entry) { | 1116 | const bool via_header_index = |
| 1084 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 1117 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1085 | if (!view) { | ||
| 1086 | // Can occur when texture addr is null or its memory is unmapped/invalid | ||
| 1087 | glBindSampler(binding, 0); | ||
| 1088 | glBindTextureUnit(binding, 0); | ||
| 1089 | return; | ||
| 1090 | } | ||
| 1091 | const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, | ||
| 1092 | texture.tic.z_source, texture.tic.w_source); | ||
| 1093 | glBindTextureUnit(binding, handle); | ||
| 1094 | if (!view->GetSurfaceParams().IsBuffer()) { | ||
| 1095 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 1096 | } | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { | ||
| 1100 | u32 binding = device.GetBaseBindings(stage_index).image; | ||
| 1101 | for (const auto& entry : shader->GetEntries().images) { | 1118 | for (const auto& entry : shader->GetEntries().images) { |
| 1102 | const auto shader_type = static_cast<ShaderType>(stage_index); | 1119 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 1103 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 1120 | const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); |
| 1104 | SetupImage(binding++, tic, entry); | 1121 | image_view_indices.push_back(handle.image); |
| 1105 | } | 1122 | } |
| 1106 | } | 1123 | } |
| 1107 | 1124 | ||
| 1108 | void RasterizerOpenGL::SetupComputeImages(Shader* shader) { | 1125 | void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { |
| 1109 | u32 binding = 0; | 1126 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1110 | for (const auto& entry : shader->GetEntries().images) { | 1127 | for (const auto& entry : shader->GetEntries().images) { |
| 1111 | const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; | 1128 | const auto handle = |
| 1112 | SetupImage(binding++, tic, entry); | 1129 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); |
| 1130 | image_view_indices.push_back(handle.image); | ||
| 1113 | } | 1131 | } |
| 1114 | } | 1132 | } |
| 1115 | 1133 | ||
| 1116 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | ||
| 1117 | const ImageEntry& entry) { | ||
| 1118 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1119 | if (!view) { | ||
| 1120 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | ||
| 1121 | return; | ||
| 1122 | } | ||
| 1123 | if (entry.is_written) { | ||
| 1124 | view->MarkAsModified(texture_cache.Tick()); | ||
| 1125 | } | ||
| 1126 | const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1127 | glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | void RasterizerOpenGL::SyncViewport() { | 1134 | void RasterizerOpenGL::SyncViewport() { |
| 1131 | auto& flags = maxwell3d.dirty.flags; | 1135 | auto& flags = maxwell3d.dirty.flags; |
| 1132 | const auto& regs = maxwell3d.regs; | 1136 | const auto& regs = maxwell3d.regs; |
| @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1526 | flags[Dirty::PointSize] = false; | 1530 | flags[Dirty::PointSize] = false; |
| 1527 | 1531 | ||
| 1528 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); | 1532 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); |
| 1533 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); | ||
| 1529 | 1534 | ||
| 1530 | if (maxwell3d.regs.vp_point_size.enable) { | ||
| 1531 | // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. | ||
| 1532 | glEnable(GL_PROGRAM_POINT_SIZE); | ||
| 1533 | return; | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid | ||
| 1537 | // in OpenGL). | ||
| 1538 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); | 1535 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); |
| 1539 | glDisable(GL_PROGRAM_POINT_SIZE); | ||
| 1540 | } | 1536 | } |
| 1541 | 1537 | ||
| 1542 | void RasterizerOpenGL::SyncLineState() { | 1538 | void RasterizerOpenGL::SyncLineState() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -7,12 +7,13 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <map> | ||
| 11 | #include <memory> | 10 | #include <memory> |
| 12 | #include <optional> | 11 | #include <optional> |
| 13 | #include <tuple> | 12 | #include <tuple> |
| 14 | #include <utility> | 13 | #include <utility> |
| 15 | 14 | ||
| 15 | #include <boost/container/static_vector.hpp> | ||
| 16 | |||
| 16 | #include <glad/glad.h> | 17 | #include <glad/glad.h> |
| 17 | 18 | ||
| 18 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| @@ -23,16 +24,14 @@ | |||
| 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 24 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 25 | #include "video_core/renderer_opengl/gl_fence_manager.h" | 26 | #include "video_core/renderer_opengl/gl_fence_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_query_cache.h" | 27 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | ||
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 33 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/renderer_opengl/utils.h" | ||
| 36 | #include "video_core/shader/async_shaders.h" | 35 | #include "video_core/shader/async_shaders.h" |
| 37 | #include "video_core/textures/texture.h" | 36 | #include "video_core/textures/texture.h" |
| 38 | 37 | ||
| @@ -51,7 +50,7 @@ class MemoryManager; | |||
| 51 | namespace OpenGL { | 50 | namespace OpenGL { |
| 52 | 51 | ||
| 53 | struct ScreenInfo; | 52 | struct ScreenInfo; |
| 54 | struct DrawParameters; | 53 | struct ShaderEntries; |
| 55 | 54 | ||
| 56 | struct BindlessSSBO { | 55 | struct BindlessSSBO { |
| 57 | GLuint64EXT address; | 56 | GLuint64EXT address; |
| @@ -79,15 +78,18 @@ public: | |||
| 79 | void InvalidateRegion(VAddr addr, u64 size) override; | 78 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 80 | void OnCPUWrite(VAddr addr, u64 size) override; | 79 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 81 | void SyncGuestHost() override; | 80 | void SyncGuestHost() override; |
| 81 | void UnmapMemory(VAddr addr, u64 size) override; | ||
| 82 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 82 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 83 | void SignalSyncPoint(u32 value) override; | 83 | void SignalSyncPoint(u32 value) override; |
| 84 | void ReleaseFences() override; | 84 | void ReleaseFences() override; |
| 85 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 85 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 86 | void WaitForIdle() override; | 86 | void WaitForIdle() override; |
| 87 | void FragmentBarrier() override; | ||
| 88 | void TiledCacheBarrier() override; | ||
| 87 | void FlushCommands() override; | 89 | void FlushCommands() override; |
| 88 | void TickFrame() override; | 90 | void TickFrame() override; |
| 89 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 91 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 90 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 92 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 91 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 93 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 92 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 94 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 93 | u32 pixel_stride) override; | 95 | u32 pixel_stride) override; |
| @@ -108,11 +110,14 @@ public: | |||
| 108 | } | 110 | } |
| 109 | 111 | ||
| 110 | private: | 112 | private: |
| 111 | /// Configures the color and depth framebuffer states. | 113 | static constexpr size_t MAX_TEXTURES = 192; |
| 112 | void ConfigureFramebuffers(); | 114 | static constexpr size_t MAX_IMAGES = 48; |
| 115 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | ||
| 116 | |||
| 117 | void BindComputeTextures(Shader* kernel); | ||
| 113 | 118 | ||
| 114 | /// Configures the color and depth framebuffer for clearing. | 119 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, |
| 115 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); | 120 | size_t& image_view_index, size_t& texture_index, size_t& image_index); |
| 116 | 121 | ||
| 117 | /// Configures the current constbuffers to use for the draw command. | 122 | /// Configures the current constbuffers to use for the draw command. |
| 118 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); | 123 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); |
| @@ -136,23 +141,16 @@ private: | |||
| 136 | size_t size, BindlessSSBO* ssbo); | 141 | size_t size, BindlessSSBO* ssbo); |
| 137 | 142 | ||
| 138 | /// Configures the current textures to use for the draw command. | 143 | /// Configures the current textures to use for the draw command. |
| 139 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); | 144 | void SetupDrawTextures(const Shader* shader, size_t stage_index); |
| 140 | 145 | ||
| 141 | /// Configures the textures used in a compute shader. | 146 | /// Configures the textures used in a compute shader. |
| 142 | void SetupComputeTextures(Shader* kernel); | 147 | void SetupComputeTextures(const Shader* kernel); |
| 143 | |||
| 144 | /// Configures a texture. | ||
| 145 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | ||
| 146 | const SamplerEntry& entry); | ||
| 147 | 148 | ||
| 148 | /// Configures images in a graphics shader. | 149 | /// Configures images in a graphics shader. |
| 149 | void SetupDrawImages(std::size_t stage_index, Shader* shader); | 150 | void SetupDrawImages(const Shader* shader, size_t stage_index); |
| 150 | 151 | ||
| 151 | /// Configures images in a compute shader. | 152 | /// Configures images in a compute shader. |
| 152 | void SetupComputeImages(Shader* shader); | 153 | void SetupComputeImages(const Shader* shader); |
| 153 | |||
| 154 | /// Configures an image. | ||
| 155 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 156 | 154 | ||
| 157 | /// Syncs the viewport and depth range to match the guest state | 155 | /// Syncs the viewport and depth range to match the guest state |
| 158 | void SyncViewport(); | 156 | void SyncViewport(); |
| @@ -227,9 +225,6 @@ private: | |||
| 227 | /// End a transform feedback | 225 | /// End a transform feedback |
| 228 | void EndTransformFeedback(); | 226 | void EndTransformFeedback(); |
| 229 | 227 | ||
| 230 | /// Check for extension that are not strictly required but are needed for correct emulation | ||
| 231 | void CheckExtensions(); | ||
| 232 | |||
| 233 | std::size_t CalculateVertexArraysSize() const; | 228 | std::size_t CalculateVertexArraysSize() const; |
| 234 | 229 | ||
| 235 | std::size_t CalculateIndexBufferSize() const; | 230 | std::size_t CalculateIndexBufferSize() const; |
| @@ -242,7 +237,7 @@ private: | |||
| 242 | 237 | ||
| 243 | GLintptr SetupIndexBuffer(); | 238 | GLintptr SetupIndexBuffer(); |
| 244 | 239 | ||
| 245 | void SetupShaders(GLenum primitive_mode); | 240 | void SetupShaders(); |
| 246 | 241 | ||
| 247 | Tegra::GPU& gpu; | 242 | Tegra::GPU& gpu; |
| 248 | Tegra::Engines::Maxwell3D& maxwell3d; | 243 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -254,19 +249,21 @@ private: | |||
| 254 | ProgramManager& program_manager; | 249 | ProgramManager& program_manager; |
| 255 | StateTracker& state_tracker; | 250 | StateTracker& state_tracker; |
| 256 | 251 | ||
| 257 | TextureCacheOpenGL texture_cache; | 252 | OGLStreamBuffer stream_buffer; |
| 253 | TextureCacheRuntime texture_cache_runtime; | ||
| 254 | TextureCache texture_cache; | ||
| 258 | ShaderCacheOpenGL shader_cache; | 255 | ShaderCacheOpenGL shader_cache; |
| 259 | SamplerCacheOpenGL sampler_cache; | ||
| 260 | FramebufferCacheOpenGL framebuffer_cache; | ||
| 261 | QueryCache query_cache; | 256 | QueryCache query_cache; |
| 262 | OGLBufferCache buffer_cache; | 257 | OGLBufferCache buffer_cache; |
| 263 | FenceManagerOpenGL fence_manager; | 258 | FenceManagerOpenGL fence_manager; |
| 264 | 259 | ||
| 265 | VideoCommon::Shader::AsyncShaders async_shaders; | 260 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 266 | 261 | ||
| 267 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 262 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 268 | 263 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | |
| 269 | GLint vertex_binding = 0; | 264 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
| 265 | std::array<GLuint, MAX_TEXTURES> texture_handles; | ||
| 266 | std::array<GLuint, MAX_IMAGES> image_handles; | ||
| 270 | 267 | ||
| 271 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | 268 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> |
| 272 | transform_feedback_buffers; | 269 | transform_feedback_buffers; |
| @@ -280,7 +277,7 @@ private: | |||
| 280 | std::size_t current_cbuf = 0; | 277 | std::size_t current_cbuf = 0; |
| 281 | OGLBuffer unified_uniform_buffer; | 278 | OGLBuffer unified_uniform_buffer; |
| 282 | 279 | ||
| 283 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 280 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 284 | std::size_t num_queued_commands = 0; | 281 | std::size_t num_queued_commands = 0; |
| 285 | 282 | ||
| 286 | u32 last_clip_distance_mask = 0; | 283 | u32 last_clip_distance_mask = 0; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -71,7 +71,7 @@ void OGLSampler::Create() { | |||
| 71 | return; | 71 | return; |
| 72 | 72 | ||
| 73 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | 73 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); |
| 74 | glGenSamplers(1, &handle); | 74 | glCreateSamplers(1, &handle); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | void OGLSampler::Release() { | 77 | void OGLSampler::Release() { |
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null | |||
| @@ -1,52 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | ||
| 8 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 9 | |||
| 10 | namespace OpenGL { | ||
| 11 | |||
| 12 | SamplerCacheOpenGL::SamplerCacheOpenGL() = default; | ||
| 13 | |||
| 14 | SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; | ||
| 15 | |||
| 16 | OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { | ||
| 17 | OGLSampler sampler; | ||
| 18 | sampler.Create(); | ||
| 19 | |||
| 20 | const GLuint sampler_id{sampler.handle}; | ||
| 21 | glSamplerParameteri( | ||
| 22 | sampler_id, GL_TEXTURE_MAG_FILTER, | ||
| 23 | MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); | ||
| 24 | glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, | ||
| 25 | MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); | ||
| 26 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); | ||
| 27 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); | ||
| 28 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); | ||
| 29 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, | ||
| 30 | tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); | ||
| 31 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, | ||
| 32 | MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); | ||
| 33 | glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); | ||
| 34 | glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); | ||
| 35 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); | ||
| 36 | glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); | ||
| 37 | if (GLAD_GL_ARB_texture_filter_anisotropic) { | ||
| 38 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); | ||
| 39 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 40 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); | ||
| 41 | } else { | ||
| 42 | LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); | ||
| 43 | } | ||
| 44 | |||
| 45 | return sampler; | ||
| 46 | } | ||
| 47 | |||
| 48 | GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { | ||
| 49 | return sampler.handle; | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null | |||
| @@ -1,25 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 10 | #include "video_core/sampler_cache.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { | ||
| 15 | public: | ||
| 16 | explicit SamplerCacheOpenGL(); | ||
| 17 | ~SamplerCacheOpenGL(); | ||
| 18 | |||
| 19 | protected: | ||
| 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; | ||
| 21 | |||
| 22 | GLuint ToSamplerType(const OGLSampler& sampler) const override; | ||
| 23 | }; | ||
| 24 | |||
| 25 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 30 | #include "video_core/renderer_opengl/utils.h" | ||
| 31 | #include "video_core/shader/memory_util.h" | 30 | #include "video_core/shader/memory_util.h" |
| 32 | #include "video_core/shader/registry.h" | 31 | #include "video_core/shader/registry.h" |
| 33 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; | |||
| 38 | using Tegra::Shader::PixelImap; | 38 | using Tegra::Shader::PixelImap; |
| 39 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using Tegra::Shader::TextureType; | 40 | using Tegra::Shader::TextureType; |
| 41 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 42 | using VideoCommon::Shader::Registry; | ||
| 43 | 41 | ||
| 44 | using namespace std::string_literals; | ||
| 45 | using namespace VideoCommon::Shader; | 42 | using namespace VideoCommon::Shader; |
| 43 | using namespace std::string_literals; | ||
| 46 | 44 | ||
| 47 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 48 | using Operation = const OperationNode&; | 46 | using Operation = const OperationNode&; |
| @@ -2753,11 +2751,11 @@ private: | |||
| 2753 | } | 2751 | } |
| 2754 | } | 2752 | } |
| 2755 | 2753 | ||
| 2756 | std::string GetSampler(const Sampler& sampler) const { | 2754 | std::string GetSampler(const SamplerEntry& sampler) const { |
| 2757 | return AppendSuffix(sampler.index, "sampler"); | 2755 | return AppendSuffix(sampler.index, "sampler"); |
| 2758 | } | 2756 | } |
| 2759 | 2757 | ||
| 2760 | std::string GetImage(const Image& image) const { | 2758 | std::string GetImage(const ImageEntry& image) const { |
| 2761 | return AppendSuffix(image.index, "image"); | 2759 | return AppendSuffix(image.index, "image"); |
| 2762 | } | 2760 | } |
| 2763 | 2761 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -20,8 +20,8 @@ namespace OpenGL { | |||
| 20 | class Device; | 20 | class Device; |
| 21 | 21 | ||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 23 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; |
| 24 | using ImageEntry = VideoCommon::Shader::Image; | 24 | using ImageEntry = VideoCommon::Shader::ImageEntry; |
| 25 | 25 | ||
| 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { |
| 27 | public: | 27 | public: |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { | |||
| 83 | } | 83 | } |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | void ProgramManager::BindHostCompute(GLuint program) { | ||
| 87 | if (use_assembly_programs) { | ||
| 88 | glDisable(GL_COMPUTE_PROGRAM_NV); | ||
| 89 | } | ||
| 90 | glUseProgram(program); | ||
| 91 | is_graphics_bound = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | void ProgramManager::RestoreGuestCompute() { | ||
| 95 | if (use_assembly_programs) { | ||
| 96 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 97 | glUseProgram(0); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 86 | void ProgramManager::UseVertexShader(GLuint program) { | 101 | void ProgramManager::UseVertexShader(GLuint program) { |
| 87 | if (use_assembly_programs) { | 102 | if (use_assembly_programs) { |
| 88 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); | 103 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -45,6 +45,12 @@ public: | |||
| 45 | /// Rewinds BindHostPipeline state changes. | 45 | /// Rewinds BindHostPipeline state changes. |
| 46 | void RestoreGuestPipeline(); | 46 | void RestoreGuestPipeline(); |
| 47 | 47 | ||
| 48 | /// Binds an OpenGL GLSL program object unsynchronized with the guest state. | ||
| 49 | void BindHostCompute(GLuint program); | ||
| 50 | |||
| 51 | /// Rewinds BindHostCompute state changes. | ||
| 52 | void RestoreGuestCompute(); | ||
| 53 | |||
| 48 | void UseVertexShader(GLuint program); | 54 | void UseVertexShader(GLuint program); |
| 49 | void UseGeometryShader(GLuint program); | 55 | void UseGeometryShader(GLuint program); |
| 50 | void UseFragmentShader(GLuint program); | 56 | void UseFragmentShader(GLuint program); |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
| 249 | } | 249 | } |
| 250 | } | 250 | } |
| 251 | 251 | ||
| 252 | void StateTracker::InvalidateStreamBuffer() { | ||
| 253 | flags[Dirty::VertexBuffers] = true; | ||
| 254 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 255 | flags[index] = true; | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 252 | } // namespace OpenGL | 259 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -92,6 +92,8 @@ class StateTracker { | |||
| 92 | public: | 92 | public: |
| 93 | explicit StateTracker(Tegra::GPU& gpu); | 93 | explicit StateTracker(Tegra::GPU& gpu); |
| 94 | 94 | ||
| 95 | void InvalidateStreamBuffer(); | ||
| 96 | |||
| 95 | void BindIndexBuffer(GLuint new_index_buffer) { | 97 | void BindIndexBuffer(GLuint new_index_buffer) { |
| 96 | if (index_buffer == new_index_buffer) { | 98 | if (index_buffer == new_index_buffer) { |
| 97 | return; | 99 | return; |
| @@ -100,6 +102,14 @@ public: | |||
| 100 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); | 102 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | void BindFramebuffer(GLuint new_framebuffer) { | ||
| 106 | if (framebuffer == new_framebuffer) { | ||
| 107 | return; | ||
| 108 | } | ||
| 109 | framebuffer = new_framebuffer; | ||
| 110 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); | ||
| 111 | } | ||
| 112 | |||
| 103 | void NotifyScreenDrawVertexArray() { | 113 | void NotifyScreenDrawVertexArray() { |
| 104 | flags[OpenGL::Dirty::VertexFormats] = true; | 114 | flags[OpenGL::Dirty::VertexFormats] = true; |
| 105 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | 115 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; |
| @@ -129,9 +139,9 @@ public: | |||
| 129 | flags[OpenGL::Dirty::Scissor0] = true; | 139 | flags[OpenGL::Dirty::Scissor0] = true; |
| 130 | } | 140 | } |
| 131 | 141 | ||
| 132 | void NotifyColorMask0() { | 142 | void NotifyColorMask(size_t index) { |
| 133 | flags[OpenGL::Dirty::ColorMasks] = true; | 143 | flags[OpenGL::Dirty::ColorMasks] = true; |
| 134 | flags[OpenGL::Dirty::ColorMask0] = true; | 144 | flags[OpenGL::Dirty::ColorMask0 + index] = true; |
| 135 | } | 145 | } |
| 136 | 146 | ||
| 137 | void NotifyBlend0() { | 147 | void NotifyBlend0() { |
| @@ -190,6 +200,7 @@ public: | |||
| 190 | private: | 200 | private: |
| 191 | Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; | 201 | Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; |
| 192 | 202 | ||
| 203 | GLuint framebuffer = 0; | ||
| 193 | GLuint index_buffer = 0; | 204 | GLuint index_buffer = 0; |
| 194 | }; | 205 | }; |
| 195 | 206 | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/renderer_opengl/gl_device.h" | 11 | #include "video_core/renderer_opengl/gl_device.h" |
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 13 | 14 | ||
| 14 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | 15 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", |
| @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
| 18 | 19 | ||
| 19 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) | 20 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) |
| 20 | : buffer_size(size) { | 21 | : state_tracker{state_tracker_} { |
| 21 | gl_buffer.Create(); | 22 | gl_buffer.Create(); |
| 22 | 23 | ||
| 23 | GLsizeiptr allocate_size = size; | ||
| 24 | if (vertex_data_usage) { | ||
| 25 | // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer | ||
| 26 | // read position is near the end and is an out-of-bound access to the vertex buffer. This is | ||
| 27 | // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the | ||
| 28 | // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the | ||
| 29 | // crash. | ||
| 30 | allocate_size *= 2; | ||
| 31 | } | ||
| 32 | |||
| 33 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; | 24 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; |
| 34 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); | 25 | glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); |
| 35 | mapped_ptr = static_cast<u8*>( | 26 | mapped_ptr = static_cast<u8*>( |
| 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | 27 | glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); |
| 37 | 28 | ||
| 38 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { | 29 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { |
| 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); |
| @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { | |||
| 46 | gl_buffer.Release(); | 37 | gl_buffer.Release(); |
| 47 | } | 38 | } |
| 48 | 39 | ||
| 49 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | 40 | std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { |
| 50 | ASSERT(size <= buffer_size); | 41 | ASSERT(size <= BUFFER_SIZE); |
| 51 | ASSERT(alignment <= buffer_size); | 42 | ASSERT(alignment <= BUFFER_SIZE); |
| 52 | mapped_size = size; | 43 | mapped_size = size; |
| 53 | 44 | ||
| 54 | if (alignment > 0) { | 45 | if (alignment > 0) { |
| 55 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | 46 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); |
| 56 | } | 47 | } |
| 57 | 48 | ||
| 58 | bool invalidate = false; | 49 | if (buffer_pos + size > BUFFER_SIZE) { |
| 59 | if (buffer_pos + size > buffer_size) { | ||
| 60 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | 50 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); |
| 61 | glInvalidateBufferData(gl_buffer.handle); | 51 | glInvalidateBufferData(gl_buffer.handle); |
| 52 | state_tracker.InvalidateStreamBuffer(); | ||
| 62 | 53 | ||
| 63 | buffer_pos = 0; | 54 | buffer_pos = 0; |
| 64 | invalidate = true; | ||
| 65 | } | 55 | } |
| 66 | 56 | ||
| 67 | return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); | 57 | return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); |
| 68 | } | 58 | } |
| 69 | 59 | ||
| 70 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | 60 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -4,29 +4,31 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <tuple> | 7 | #include <utility> |
| 8 | |||
| 8 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 10 | |||
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 11 | 13 | ||
| 12 | namespace OpenGL { | 14 | namespace OpenGL { |
| 13 | 15 | ||
| 14 | class Device; | 16 | class Device; |
| 17 | class StateTracker; | ||
| 15 | 18 | ||
| 16 | class OGLStreamBuffer : private NonCopyable { | 19 | class OGLStreamBuffer : private NonCopyable { |
| 17 | public: | 20 | public: |
| 18 | explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); | 21 | explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); |
| 19 | ~OGLStreamBuffer(); | 22 | ~OGLStreamBuffer(); |
| 20 | 23 | ||
| 21 | /* | 24 | /* |
| 22 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | 25 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes |
| 23 | * and the optional alignment requirement. | 26 | * and the optional alignment requirement. |
| 24 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | 27 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. |
| 25 | * The return values are the pointer to the new chunk, the offset within the buffer, | 28 | * The return values are the pointer to the new chunk, and the offset within the buffer. |
| 26 | * and the invalidation flag for previous chunks. | ||
| 27 | * The actual used size must be specified on unmapping the chunk. | 29 | * The actual used size must be specified on unmapping the chunk. |
| 28 | */ | 30 | */ |
| 29 | std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); | 31 | std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); |
| 30 | 32 | ||
| 31 | void Unmap(GLsizeiptr size); | 33 | void Unmap(GLsizeiptr size); |
| 32 | 34 | ||
| @@ -39,15 +41,18 @@ public: | |||
| 39 | } | 41 | } |
| 40 | 42 | ||
| 41 | GLsizeiptr Size() const noexcept { | 43 | GLsizeiptr Size() const noexcept { |
| 42 | return buffer_size; | 44 | return BUFFER_SIZE; |
| 43 | } | 45 | } |
| 44 | 46 | ||
| 45 | private: | 47 | private: |
| 48 | static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; | ||
| 49 | |||
| 50 | StateTracker& state_tracker; | ||
| 51 | |||
| 46 | OGLBuffer gl_buffer; | 52 | OGLBuffer gl_buffer; |
| 47 | 53 | ||
| 48 | GLuint64EXT gpu_address = 0; | 54 | GLuint64EXT gpu_address = 0; |
| 49 | GLintptr buffer_pos = 0; | 55 | GLintptr buffer_pos = 0; |
| 50 | GLsizeiptr buffer_size = 0; | ||
| 51 | GLsizeiptr mapped_size = 0; | 56 | GLsizeiptr mapped_size = 0; |
| 52 | u8* mapped_ptr = nullptr; | 57 | u8* mapped_ptr = nullptr; |
| 53 | }; | 58 | }; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -2,173 +2,238 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include <algorithm> |
| 6 | #include "common/bit_util.h" | 6 | #include <array> |
| 7 | #include "common/common_types.h" | 7 | #include <bit> |
| 8 | #include "common/microprofile.h" | 8 | #include <string> |
| 9 | #include "common/scope_exit.h" | 9 | |
| 10 | #include "core/core.h" | 10 | #include <glad/glad.h> |
| 11 | #include "video_core/morton.h" | 11 | |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 15 | #include "video_core/renderer_opengl/utils.h" | 16 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 16 | #include "video_core/texture_cache/surface_base.h" | 17 | #include "video_core/renderer_opengl/util_shaders.h" |
| 18 | #include "video_core/surface.h" | ||
| 19 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 20 | #include "video_core/texture_cache/samples_helper.h" | ||
| 17 | #include "video_core/texture_cache/texture_cache.h" | 21 | #include "video_core/texture_cache/texture_cache.h" |
| 18 | #include "video_core/textures/convert.h" | 22 | #include "video_core/textures/decoders.h" |
| 19 | #include "video_core/textures/texture.h" | ||
| 20 | 23 | ||
| 21 | namespace OpenGL { | 24 | namespace OpenGL { |
| 22 | 25 | ||
| 23 | using Tegra::Texture::SwizzleSource; | 26 | namespace { |
| 24 | using VideoCore::MortonSwizzleMode; | ||
| 25 | 27 | ||
| 28 | using Tegra::Texture::SwizzleSource; | ||
| 29 | using Tegra::Texture::TextureMipmapFilter; | ||
| 30 | using Tegra::Texture::TextureType; | ||
| 31 | using Tegra::Texture::TICEntry; | ||
| 32 | using Tegra::Texture::TSCEntry; | ||
| 33 | using VideoCommon::CalculateLevelStrideAlignment; | ||
| 34 | using VideoCommon::ImageCopy; | ||
| 35 | using VideoCommon::ImageFlagBits; | ||
| 36 | using VideoCommon::ImageType; | ||
| 37 | using VideoCommon::NUM_RT; | ||
| 38 | using VideoCommon::SamplesLog2; | ||
| 39 | using VideoCommon::SwizzleParameters; | ||
| 40 | using VideoCore::Surface::BytesPerBlock; | ||
| 41 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 42 | using VideoCore::Surface::IsPixelFormatSRGB; | ||
| 43 | using VideoCore::Surface::MaxPixelFormat; | ||
| 26 | using VideoCore::Surface::PixelFormat; | 44 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::SurfaceTarget; | ||
| 28 | using VideoCore::Surface::SurfaceType; | 45 | using VideoCore::Surface::SurfaceType; |
| 29 | 46 | ||
| 30 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 47 | struct CopyOrigin { |
| 31 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 48 | GLint level; |
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | 49 | GLint x; |
| 33 | MP_RGB(128, 192, 128)); | 50 | GLint y; |
| 51 | GLint z; | ||
| 52 | }; | ||
| 34 | 53 | ||
| 35 | namespace { | 54 | struct CopyRegion { |
| 55 | GLsizei width; | ||
| 56 | GLsizei height; | ||
| 57 | GLsizei depth; | ||
| 58 | }; | ||
| 36 | 59 | ||
| 37 | struct FormatTuple { | 60 | struct FormatTuple { |
| 38 | GLenum internal_format; | 61 | GLenum internal_format; |
| 39 | GLenum format = GL_NONE; | 62 | GLenum format = GL_NONE; |
| 40 | GLenum type = GL_NONE; | 63 | GLenum type = GL_NONE; |
| 64 | GLenum store_format = internal_format; | ||
| 41 | }; | 65 | }; |
| 42 | 66 | ||
| 43 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | 67 | constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ |
| 44 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | 68 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM |
| 45 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | 69 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM |
| 46 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | 70 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT |
| 47 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT | 71 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT |
| 48 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM | 72 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM |
| 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM | 73 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM |
| 50 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | 74 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM |
| 51 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | 75 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM |
| 52 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | 76 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT |
| 53 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | 77 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM |
| 54 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | 78 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM |
| 55 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM | 79 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM |
| 56 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT | 80 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT |
| 57 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT | 81 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT |
| 58 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT | 82 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT |
| 59 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM | 83 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM |
| 60 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM | 84 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM |
| 61 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT | 85 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT |
| 62 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT | 86 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT |
| 63 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT | 87 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT |
| 64 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT | 88 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT |
| 65 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM | 89 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM |
| 66 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM | 90 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM |
| 67 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM | 91 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM |
| 68 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM | 92 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM |
| 69 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM | 93 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM |
| 70 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM | 94 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM |
| 71 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM | 95 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM |
| 72 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM | 96 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM |
| 73 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | 97 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT |
| 74 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | 98 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT |
| 75 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | 99 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM |
| 76 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | 100 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM |
| 77 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | 101 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT |
| 78 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | 102 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT |
| 79 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | 103 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT |
| 80 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT | 104 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT |
| 81 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT | 105 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT |
| 82 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT | 106 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT |
| 83 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM | 107 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM |
| 84 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM | 108 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM |
| 85 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT | 109 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT |
| 86 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT | 110 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT |
| 87 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM | 111 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM |
| 88 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT | 112 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT |
| 89 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT | 113 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT |
| 90 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT | 114 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT |
| 91 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM | 115 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM |
| 92 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT | 116 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT |
| 93 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB | 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB |
| 94 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM | 118 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM |
| 95 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM | 119 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM |
| 96 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT | 120 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT |
| 97 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT | 121 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT |
| 98 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT | 122 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT |
| 99 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT | 123 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT |
| 100 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT | 124 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT |
| 101 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT | 125 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT |
| 102 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | 126 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM |
| 103 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | 127 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM |
| 104 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | 128 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM |
| 105 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | 129 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM |
| 106 | // Compressed sRGB formats | 130 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB |
| 107 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB |
| 108 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | 132 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB |
| 109 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | 133 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB |
| 110 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | 134 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM |
| 111 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | 135 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB |
| 112 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | 136 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB |
| 113 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | 137 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB |
| 114 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | 138 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB |
| 115 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | 139 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM |
| 116 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | 140 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB |
| 117 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | 141 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM |
| 118 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | 142 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB |
| 119 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | 143 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM |
| 120 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | 144 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB |
| 121 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | 145 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM |
| 122 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | 146 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB |
| 123 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | 147 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM |
| 124 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | 148 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB |
| 125 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | 149 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM |
| 126 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | 150 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB |
| 127 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | 151 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM |
| 128 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | 152 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB |
| 129 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | 153 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT |
| 130 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | 154 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT |
| 131 | 155 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | |
| 132 | // Depth formats | 156 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT |
| 133 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | 157 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM |
| 134 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 135 | |||
| 136 | // DepthStencil formats | ||
| 137 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 138 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 139 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | 158 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, |
| 140 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | 159 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT |
| 141 | }}; | 160 | }}; |
| 142 | 161 | ||
| 162 | constexpr std::array ACCELERATED_FORMATS{ | ||
| 163 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, | ||
| 164 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, | ||
| 165 | GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, | ||
| 166 | GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, | ||
| 167 | GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, | ||
| 168 | GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, | ||
| 169 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, | ||
| 170 | }; | ||
| 171 | |||
| 143 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | 172 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { |
| 144 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | 173 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); |
| 145 | return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; | 174 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; |
| 146 | } | 175 | } |
| 147 | 176 | ||
| 148 | GLenum GetTextureTarget(const SurfaceTarget& target) { | 177 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { |
| 149 | switch (target) { | 178 | switch (info.type) { |
| 150 | case SurfaceTarget::TextureBuffer: | 179 | case ImageType::e1D: |
| 180 | return GL_TEXTURE_1D_ARRAY; | ||
| 181 | case ImageType::e2D: | ||
| 182 | if (info.num_samples > 1) { | ||
| 183 | return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; | ||
| 184 | } | ||
| 185 | return GL_TEXTURE_2D_ARRAY; | ||
| 186 | case ImageType::e3D: | ||
| 187 | return GL_TEXTURE_3D; | ||
| 188 | case ImageType::Linear: | ||
| 189 | return GL_TEXTURE_2D_ARRAY; | ||
| 190 | case ImageType::Buffer: | ||
| 151 | return GL_TEXTURE_BUFFER; | 191 | return GL_TEXTURE_BUFFER; |
| 152 | case SurfaceTarget::Texture1D: | 192 | } |
| 193 | UNREACHABLE_MSG("Invalid image type={}", info.type); | ||
| 194 | return GL_NONE; | ||
| 195 | } | ||
| 196 | |||
| 197 | GLenum ImageTarget(ImageViewType type, int num_samples = 1) { | ||
| 198 | const bool is_multisampled = num_samples > 1; | ||
| 199 | switch (type) { | ||
| 200 | case ImageViewType::e1D: | ||
| 153 | return GL_TEXTURE_1D; | 201 | return GL_TEXTURE_1D; |
| 154 | case SurfaceTarget::Texture2D: | 202 | case ImageViewType::e2D: |
| 155 | return GL_TEXTURE_2D; | 203 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; |
| 156 | case SurfaceTarget::Texture3D: | 204 | case ImageViewType::Cube: |
| 205 | return GL_TEXTURE_CUBE_MAP; | ||
| 206 | case ImageViewType::e3D: | ||
| 157 | return GL_TEXTURE_3D; | 207 | return GL_TEXTURE_3D; |
| 158 | case SurfaceTarget::Texture1DArray: | 208 | case ImageViewType::e1DArray: |
| 159 | return GL_TEXTURE_1D_ARRAY; | 209 | return GL_TEXTURE_1D_ARRAY; |
| 160 | case SurfaceTarget::Texture2DArray: | 210 | case ImageViewType::e2DArray: |
| 161 | return GL_TEXTURE_2D_ARRAY; | 211 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; |
| 162 | case SurfaceTarget::TextureCubemap: | 212 | case ImageViewType::CubeArray: |
| 163 | return GL_TEXTURE_CUBE_MAP; | ||
| 164 | case SurfaceTarget::TextureCubeArray: | ||
| 165 | return GL_TEXTURE_CUBE_MAP_ARRAY; | 213 | return GL_TEXTURE_CUBE_MAP_ARRAY; |
| 214 | case ImageViewType::Rect: | ||
| 215 | return GL_TEXTURE_RECTANGLE; | ||
| 216 | case ImageViewType::Buffer: | ||
| 217 | return GL_TEXTURE_BUFFER; | ||
| 166 | } | 218 | } |
| 167 | UNREACHABLE(); | 219 | UNREACHABLE_MSG("Invalid image view type={}", type); |
| 168 | return {}; | 220 | return GL_NONE; |
| 169 | } | 221 | } |
| 170 | 222 | ||
| 171 | GLint GetSwizzleSource(SwizzleSource source) { | 223 | GLenum TextureMode(PixelFormat format, bool is_first) { |
| 224 | switch (format) { | ||
| 225 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 226 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 227 | return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; | ||
| 228 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 229 | return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; | ||
| 230 | default: | ||
| 231 | UNREACHABLE(); | ||
| 232 | return GL_DEPTH_COMPONENT; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | GLint Swizzle(SwizzleSource source) { | ||
| 172 | switch (source) { | 237 | switch (source) { |
| 173 | case SwizzleSource::Zero: | 238 | case SwizzleSource::Zero: |
| 174 | return GL_ZERO; | 239 | return GL_ZERO; |
| @@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) { | |||
| 184 | case SwizzleSource::OneFloat: | 249 | case SwizzleSource::OneFloat: |
| 185 | return GL_ONE; | 250 | return GL_ONE; |
| 186 | } | 251 | } |
| 187 | UNREACHABLE(); | 252 | UNREACHABLE_MSG("Invalid swizzle source={}", source); |
| 188 | return GL_NONE; | 253 | return GL_NONE; |
| 189 | } | 254 | } |
| 190 | 255 | ||
| 191 | GLenum GetComponent(PixelFormat format, bool is_first) { | 256 | GLenum AttachmentType(PixelFormat format) { |
| 192 | switch (format) { | 257 | switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { |
| 193 | case PixelFormat::D24_UNORM_S8_UINT: | 258 | case SurfaceType::Depth: |
| 194 | case PixelFormat::D32_FLOAT_S8_UINT: | 259 | return GL_DEPTH_ATTACHMENT; |
| 195 | return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; | 260 | case SurfaceType::DepthStencil: |
| 196 | case PixelFormat::S8_UINT_D24_UNORM: | 261 | return GL_DEPTH_STENCIL_ATTACHMENT; |
| 197 | return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; | ||
| 198 | default: | 262 | default: |
| 199 | UNREACHABLE(); | 263 | UNIMPLEMENTED_MSG("Unimplemented type={}", type); |
| 200 | return GL_DEPTH_COMPONENT; | 264 | return GL_NONE; |
| 201 | } | 265 | } |
| 202 | } | 266 | } |
| 203 | 267 | ||
| 204 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | 268 | [[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { |
| 205 | if (params.IsBuffer()) { | 269 | if (!device.HasASTC() && IsPixelFormatASTC(format)) { |
| 206 | return; | 270 | return true; |
| 207 | } | 271 | } |
| 208 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 272 | switch (format) { |
| 209 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 273 | case PixelFormat::BC4_UNORM: |
| 210 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | 274 | case PixelFormat::BC5_UNORM: |
| 211 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | 275 | return type == ImageType::e3D; |
| 212 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); | 276 | default: |
| 213 | if (params.num_levels == 1) { | 277 | break; |
| 214 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); | ||
| 215 | } | 278 | } |
| 279 | return false; | ||
| 216 | } | 280 | } |
| 217 | 281 | ||
| 218 | OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, | 282 | [[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { |
| 219 | OGLBuffer& texture_buffer) { | 283 | switch (value) { |
| 220 | OGLTexture texture; | 284 | case SwizzleSource::G: |
| 221 | texture.Create(target); | 285 | return SwizzleSource::R; |
| 286 | default: | ||
| 287 | return value; | ||
| 288 | } | ||
| 289 | } | ||
| 222 | 290 | ||
| 223 | switch (params.target) { | 291 | void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) { |
| 224 | case SurfaceTarget::Texture1D: | 292 | switch (format) { |
| 225 | glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); | 293 | case PixelFormat::D24_UNORM_S8_UINT: |
| 226 | break; | 294 | case PixelFormat::D32_FLOAT_S8_UINT: |
| 227 | case SurfaceTarget::TextureBuffer: | 295 | case PixelFormat::S8_UINT_D24_UNORM: |
| 228 | texture_buffer.Create(); | 296 | UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); |
| 229 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | 297 | glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, |
| 230 | nullptr, GL_DYNAMIC_STORAGE_BIT); | 298 | TextureMode(format, swizzle[0] == SwizzleSource::R)); |
| 231 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | 299 | std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); |
| 232 | break; | 300 | break; |
| 233 | case SurfaceTarget::Texture2D: | 301 | default: |
| 234 | case SurfaceTarget::TextureCubemap: | ||
| 235 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 236 | params.height); | ||
| 237 | break; | 302 | break; |
| 238 | case SurfaceTarget::Texture3D: | 303 | } |
| 239 | case SurfaceTarget::Texture2DArray: | 304 | std::array<GLint, 4> gl_swizzle; |
| 240 | case SurfaceTarget::TextureCubeArray: | 305 | std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); |
| 241 | glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, | 306 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); |
| 242 | params.height, params.depth); | 307 | } |
| 308 | |||
| 309 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | ||
| 310 | const VideoCommon::ImageInfo& info) { | ||
| 311 | // Disable accelerated uploads for now as they don't implement swizzled uploads | ||
| 312 | return false; | ||
| 313 | switch (info.type) { | ||
| 314 | case ImageType::e2D: | ||
| 315 | case ImageType::e3D: | ||
| 316 | case ImageType::Linear: | ||
| 243 | break; | 317 | break; |
| 244 | default: | 318 | default: |
| 245 | UNREACHABLE(); | 319 | return false; |
| 320 | } | ||
| 321 | const GLenum internal_format = GetFormatTuple(info.format).internal_format; | ||
| 322 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); | ||
| 323 | if (format_info.is_compressed) { | ||
| 324 | return false; | ||
| 325 | } | ||
| 326 | if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { | ||
| 327 | return false; | ||
| 246 | } | 328 | } |
| 329 | if (format_info.compatibility_by_size) { | ||
| 330 | return true; | ||
| 331 | } | ||
| 332 | const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); | ||
| 333 | const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; | ||
| 334 | return format_info.compatibility_class == store_class; | ||
| 335 | } | ||
| 247 | 336 | ||
| 248 | ApplyTextureDefaults(params, texture.handle); | 337 | [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, |
| 338 | VideoCommon::SubresourceLayers subresource, GLenum target) { | ||
| 339 | switch (target) { | ||
| 340 | case GL_TEXTURE_2D_ARRAY: | ||
| 341 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: | ||
| 342 | return CopyOrigin{ | ||
| 343 | .level = static_cast<GLint>(subresource.base_level), | ||
| 344 | .x = static_cast<GLint>(offset.x), | ||
| 345 | .y = static_cast<GLint>(offset.y), | ||
| 346 | .z = static_cast<GLint>(subresource.base_layer), | ||
| 347 | }; | ||
| 348 | case GL_TEXTURE_3D: | ||
| 349 | return CopyOrigin{ | ||
| 350 | .level = static_cast<GLint>(subresource.base_level), | ||
| 351 | .x = static_cast<GLint>(offset.x), | ||
| 352 | .y = static_cast<GLint>(offset.y), | ||
| 353 | .z = static_cast<GLint>(offset.z), | ||
| 354 | }; | ||
| 355 | default: | ||
| 356 | UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); | ||
| 357 | return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; | ||
| 358 | } | ||
| 359 | } | ||
| 249 | 360 | ||
| 250 | return texture; | 361 | [[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, |
| 362 | VideoCommon::SubresourceLayers dst_subresource, | ||
| 363 | GLenum target) { | ||
| 364 | switch (target) { | ||
| 365 | case GL_TEXTURE_2D_ARRAY: | ||
| 366 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: | ||
| 367 | return CopyRegion{ | ||
| 368 | .width = static_cast<GLsizei>(extent.width), | ||
| 369 | .height = static_cast<GLsizei>(extent.height), | ||
| 370 | .depth = static_cast<GLsizei>(dst_subresource.num_layers), | ||
| 371 | }; | ||
| 372 | case GL_TEXTURE_3D: | ||
| 373 | return CopyRegion{ | ||
| 374 | .width = static_cast<GLsizei>(extent.width), | ||
| 375 | .height = static_cast<GLsizei>(extent.height), | ||
| 376 | .depth = static_cast<GLsizei>(extent.depth), | ||
| 377 | }; | ||
| 378 | default: | ||
| 379 | UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); | ||
| 380 | return CopyRegion{.width = 0, .height = 0, .depth = 0}; | ||
| 381 | } | ||
| 251 | } | 382 | } |
| 252 | 383 | ||
| 253 | constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | 384 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { |
| 254 | SwizzleSource w_source) { | 385 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 255 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | 386 | const GLuint texture = image_view->DefaultHandle(); |
| 256 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | 387 | glNamedFramebufferTexture(fbo, attachment, texture, 0); |
| 388 | return; | ||
| 389 | } | ||
| 390 | const GLuint texture = image_view->Handle(ImageViewType::e3D); | ||
| 391 | if (image_view->range.extent.layers > 1) { | ||
| 392 | // TODO: OpenGL doesn't support rendering to a fixed number of slices | ||
| 393 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||
| 394 | } else { | ||
| 395 | const u32 slice = image_view->range.base.layer; | ||
| 396 | glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); | ||
| 397 | } | ||
| 257 | } | 398 | } |
| 258 | 399 | ||
| 259 | } // Anonymous namespace | 400 | } // Anonymous namespace |
| 260 | 401 | ||
| 261 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, | 402 | ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) |
| 262 | bool is_astc_supported_) | 403 | : span(map, size), sync{sync_}, handle{handle_} {} |
| 263 | : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} { | ||
| 264 | if (is_converted) { | ||
| 265 | internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 266 | format = GL_RGBA; | ||
| 267 | type = GL_UNSIGNED_BYTE; | ||
| 268 | } else { | ||
| 269 | const auto& tuple{GetFormatTuple(params.pixel_format)}; | ||
| 270 | internal_format = tuple.internal_format; | ||
| 271 | format = tuple.format; | ||
| 272 | type = tuple.type; | ||
| 273 | is_compressed = params.IsCompressed(); | ||
| 274 | } | ||
| 275 | target = GetTextureTarget(params.target); | ||
| 276 | texture = CreateTexture(params, target, internal_format, texture_buffer); | ||
| 277 | DecorateSurfaceName(); | ||
| 278 | 404 | ||
| 279 | u32 num_layers = 1; | 405 | ImageBufferMap::~ImageBufferMap() { |
| 280 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { | 406 | if (sync) { |
| 281 | num_layers = params.depth; | 407 | sync->Create(); |
| 282 | } | 408 | } |
| 283 | |||
| 284 | main_view = | ||
| 285 | CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); | ||
| 286 | } | 409 | } |
| 287 | 410 | ||
| 288 | CachedSurface::~CachedSurface() = default; | 411 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, |
| 412 | StateTracker& state_tracker_) | ||
| 413 | : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { | ||
| 414 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | ||
| 415 | for (size_t i = 0; i < TARGETS.size(); ++i) { | ||
| 416 | const GLenum target = TARGETS[i]; | ||
| 417 | for (const FormatTuple& tuple : FORMAT_TABLE) { | ||
| 418 | const GLenum format = tuple.internal_format; | ||
| 419 | GLint compat_class; | ||
| 420 | GLint compat_type; | ||
| 421 | GLint is_compressed; | ||
| 422 | glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); | ||
| 423 | glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, | ||
| 424 | &compat_type); | ||
| 425 | glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); | ||
| 426 | const FormatProperties properties{ | ||
| 427 | .compatibility_class = static_cast<GLenum>(compat_class), | ||
| 428 | .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, | ||
| 429 | .is_compressed = is_compressed == GL_TRUE, | ||
| 430 | }; | ||
| 431 | format_properties[i].emplace(format, properties); | ||
| 432 | } | ||
| 433 | } | ||
| 434 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); | ||
| 435 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); | ||
| 436 | null_image_3d.Create(GL_TEXTURE_3D); | ||
| 437 | null_image_rect.Create(GL_TEXTURE_RECTANGLE); | ||
| 438 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); | ||
| 439 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); | ||
| 440 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); | ||
| 441 | glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); | ||
| 442 | |||
| 443 | std::array<GLuint, 4> new_handles; | ||
| 444 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); | ||
| 445 | null_image_view_1d.handle = new_handles[0]; | ||
| 446 | null_image_view_2d.handle = new_handles[1]; | ||
| 447 | null_image_view_2d_array.handle = new_handles[2]; | ||
| 448 | null_image_view_cube.handle = new_handles[3]; | ||
| 449 | glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, | ||
| 450 | 0, 1); | ||
| 451 | glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, | ||
| 452 | 1, 0, 1); | ||
| 453 | glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, | ||
| 454 | null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); | ||
| 455 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, | ||
| 456 | GL_R8, 0, 1, 0, 6); | ||
| 457 | const std::array texture_handles{ | ||
| 458 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, | ||
| 459 | null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, | ||
| 460 | null_image_view_2d_array.handle, null_image_view_cube.handle, | ||
| 461 | }; | ||
| 462 | for (const GLuint handle : texture_handles) { | ||
| 463 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; | ||
| 464 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); | ||
| 465 | } | ||
| 466 | const auto set_view = [this](ImageViewType type, GLuint handle) { | ||
| 467 | if (device.HasDebuggingToolAttached()) { | ||
| 468 | const std::string name = fmt::format("NullImage {}", type); | ||
| 469 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 470 | } | ||
| 471 | null_image_views[static_cast<size_t>(type)] = handle; | ||
| 472 | }; | ||
| 473 | set_view(ImageViewType::e1D, null_image_view_1d.handle); | ||
| 474 | set_view(ImageViewType::e2D, null_image_view_2d.handle); | ||
| 475 | set_view(ImageViewType::Cube, null_image_view_cube.handle); | ||
| 476 | set_view(ImageViewType::e3D, null_image_3d.handle); | ||
| 477 | set_view(ImageViewType::e1DArray, null_image_1d_array.handle); | ||
| 478 | set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); | ||
| 479 | set_view(ImageViewType::CubeArray, null_image_cube_array.handle); | ||
| 480 | set_view(ImageViewType::Rect, null_image_rect.handle); | ||
| 481 | } | ||
| 289 | 482 | ||
| 290 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | 483 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| 291 | MICROPROFILE_SCOPE(OpenGL_Texture_Download); | ||
| 292 | 484 | ||
| 293 | if (params.IsBuffer()) { | 485 | void TextureCacheRuntime::Finish() { |
| 294 | glGetNamedBufferSubData(texture_buffer.handle, 0, | 486 | glFinish(); |
| 295 | static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), | 487 | } |
| 296 | staging_buffer.data()); | ||
| 297 | return; | ||
| 298 | } | ||
| 299 | 488 | ||
| 300 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); | 489 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { |
| 490 | return upload_buffers.RequestMap(size, true); | ||
| 491 | } | ||
| 301 | 492 | ||
| 302 | for (u32 level = 0; level < params.emulated_levels; ++level) { | 493 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { |
| 303 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); | 494 | return download_buffers.RequestMap(size, false); |
| 304 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 495 | } |
| 305 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); | ||
| 306 | 496 | ||
| 307 | u8* const mip_data = staging_buffer.data() + mip_offset; | 497 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, |
| 308 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); | 498 | std::span<const ImageCopy> copies) { |
| 309 | if (is_compressed) { | 499 | const GLuint dst_name = dst_image.Handle(); |
| 310 | glGetCompressedTextureImage(texture.handle, level, size, mip_data); | 500 | const GLuint src_name = src_image.Handle(); |
| 311 | } else { | 501 | const GLenum dst_target = ImageTarget(dst_image.info); |
| 312 | glGetTextureImage(texture.handle, level, format, type, size, mip_data); | 502 | const GLenum src_target = ImageTarget(src_image.info); |
| 313 | } | 503 | for (const ImageCopy& copy : copies) { |
| 504 | const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); | ||
| 505 | const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); | ||
| 506 | const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); | ||
| 507 | glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, | ||
| 508 | src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, | ||
| 509 | dst_origin.y, dst_origin.z, region.width, region.height, region.depth); | ||
| 314 | } | 510 | } |
| 315 | } | 511 | } |
| 316 | 512 | ||
| 317 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | 513 | bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { |
| 318 | MICROPROFILE_SCOPE(OpenGL_Texture_Upload); | 514 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 319 | SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); | 515 | return false; |
| 320 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 321 | UploadTextureMipmap(level, staging_buffer); | ||
| 322 | } | 516 | } |
| 517 | return true; | ||
| 323 | } | 518 | } |
| 324 | 519 | ||
| 325 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { | 520 | void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, |
| 326 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); | 521 | std::span<const ImageCopy> copies) { |
| 327 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 522 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 328 | 523 | ASSERT(src.info.type == ImageType::e3D); | |
| 329 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); | 524 | util_shaders.CopyBC4(dst, src, copies); |
| 330 | const u8* buffer{staging_buffer.data() + mip_offset}; | ||
| 331 | if (is_compressed) { | ||
| 332 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; | ||
| 333 | switch (params.target) { | ||
| 334 | case SurfaceTarget::Texture2D: | ||
| 335 | glCompressedTextureSubImage2D(texture.handle, level, 0, 0, | ||
| 336 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 337 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 338 | internal_format, image_size, buffer); | ||
| 339 | break; | ||
| 340 | case SurfaceTarget::Texture3D: | ||
| 341 | case SurfaceTarget::Texture2DArray: | ||
| 342 | case SurfaceTarget::TextureCubeArray: | ||
| 343 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, | ||
| 344 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 345 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 346 | static_cast<GLsizei>(params.GetMipDepth(level)), | ||
| 347 | internal_format, image_size, buffer); | ||
| 348 | break; | ||
| 349 | case SurfaceTarget::TextureCubemap: { | ||
| 350 | const std::size_t host_layer_size{params.GetHostLayerSize(level)}; | ||
| 351 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 352 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 353 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 354 | static_cast<GLsizei>(params.GetMipHeight(level)), 1, | ||
| 355 | internal_format, | ||
| 356 | static_cast<GLsizei>(host_layer_size), buffer); | ||
| 357 | buffer += host_layer_size; | ||
| 358 | } | ||
| 359 | break; | ||
| 360 | } | ||
| 361 | default: | ||
| 362 | UNREACHABLE(); | ||
| 363 | } | ||
| 364 | } else { | 525 | } else { |
| 365 | switch (params.target) { | 526 | UNREACHABLE(); |
| 366 | case SurfaceTarget::Texture1D: | ||
| 367 | glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, | ||
| 368 | buffer); | ||
| 369 | break; | ||
| 370 | case SurfaceTarget::TextureBuffer: | ||
| 371 | ASSERT(level == 0); | ||
| 372 | glNamedBufferSubData(texture_buffer.handle, 0, | ||
| 373 | params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); | ||
| 374 | break; | ||
| 375 | case SurfaceTarget::Texture1DArray: | ||
| 376 | case SurfaceTarget::Texture2D: | ||
| 377 | glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), | ||
| 378 | params.GetMipHeight(level), format, type, buffer); | ||
| 379 | break; | ||
| 380 | case SurfaceTarget::Texture3D: | ||
| 381 | case SurfaceTarget::Texture2DArray: | ||
| 382 | case SurfaceTarget::TextureCubeArray: | ||
| 383 | glTextureSubImage3D( | ||
| 384 | texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 385 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 386 | static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); | ||
| 387 | break; | ||
| 388 | case SurfaceTarget::TextureCubemap: | ||
| 389 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 390 | glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 391 | params.GetMipWidth(level), params.GetMipHeight(level), 1, | ||
| 392 | format, type, buffer); | ||
| 393 | buffer += params.GetHostLayerSize(level); | ||
| 394 | } | ||
| 395 | break; | ||
| 396 | default: | ||
| 397 | UNREACHABLE(); | ||
| 398 | } | ||
| 399 | } | 527 | } |
| 400 | } | 528 | } |
| 401 | 529 | ||
| 402 | void CachedSurface::DecorateSurfaceName() { | 530 | void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, |
| 403 | LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); | 531 | const std::array<Offset2D, 2>& dst_region, |
| 404 | } | 532 | const std::array<Offset2D, 2>& src_region, |
| 533 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 534 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 535 | state_tracker.NotifyScissor0(); | ||
| 536 | state_tracker.NotifyRasterizeEnable(); | ||
| 537 | state_tracker.NotifyFramebufferSRGB(); | ||
| 405 | 538 | ||
| 406 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { | 539 | ASSERT(dst->BufferBits() == src->BufferBits()); |
| 407 | LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); | 540 | |
| 541 | glEnable(GL_FRAMEBUFFER_SRGB); | ||
| 542 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 543 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 544 | |||
| 545 | const GLbitfield buffer_bits = dst->BufferBits(); | ||
| 546 | const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; | ||
| 547 | const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; | ||
| 548 | glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, | ||
| 549 | src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, | ||
| 550 | dst_region[1].x, dst_region[1].y, buffer_bits, | ||
| 551 | is_linear ? GL_LINEAR : GL_NEAREST); | ||
| 408 | } | 552 | } |
| 409 | 553 | ||
| 410 | View CachedSurface::CreateView(const ViewParams& view_key) { | 554 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 411 | return CreateViewInner(view_key, false); | 555 | size_t buffer_offset, |
| 556 | std::span<const SwizzleParameters> swizzles) { | ||
| 557 | switch (image.info.type) { | ||
| 558 | case ImageType::e2D: | ||
| 559 | return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); | ||
| 560 | case ImageType::e3D: | ||
| 561 | return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); | ||
| 562 | case ImageType::Linear: | ||
| 563 | return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); | ||
| 564 | default: | ||
| 565 | UNREACHABLE(); | ||
| 566 | break; | ||
| 567 | } | ||
| 412 | } | 568 | } |
| 413 | 569 | ||
| 414 | View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { | 570 | void TextureCacheRuntime::InsertUploadMemoryBarrier() { |
| 415 | auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); | 571 | glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); |
| 416 | views[view_key] = view; | ||
| 417 | if (!is_proxy) | ||
| 418 | view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); | ||
| 419 | return view; | ||
| 420 | } | 572 | } |
| 421 | 573 | ||
| 422 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, | 574 | FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { |
| 423 | bool is_proxy_) | 575 | switch (type) { |
| 424 | : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, | 576 | case ImageType::e1D: |
| 425 | target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { | 577 | return format_properties[0].at(internal_format); |
| 426 | if (!is_proxy_) { | 578 | case ImageType::e2D: |
| 427 | main_view = CreateTextureView(); | 579 | case ImageType::Linear: |
| 580 | return format_properties[1].at(internal_format); | ||
| 581 | case ImageType::e3D: | ||
| 582 | return format_properties[2].at(internal_format); | ||
| 583 | default: | ||
| 584 | UNREACHABLE(); | ||
| 585 | return FormatProperties{}; | ||
| 428 | } | 586 | } |
| 429 | } | 587 | } |
| 430 | 588 | ||
| 431 | CachedSurfaceView::~CachedSurfaceView() = default; | 589 | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) |
| 590 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} | ||
| 432 | 591 | ||
| 433 | void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { | 592 | TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; |
| 434 | ASSERT(params.num_levels == 1); | ||
| 435 | 593 | ||
| 436 | if (params.target == SurfaceTarget::Texture3D) { | 594 | ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, |
| 437 | if (params.num_layers > 1) { | 595 | bool insert_fence) { |
| 438 | ASSERT(params.base_layer == 0); | 596 | const size_t index = RequestBuffer(requested_size); |
| 439 | glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); | 597 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; |
| 440 | } else { | 598 | return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); |
| 441 | glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, | 599 | } |
| 442 | params.base_level, params.base_layer); | 600 | |
| 443 | } | 601 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { |
| 444 | return; | 602 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { |
| 603 | return *index; | ||
| 445 | } | 604 | } |
| 446 | 605 | ||
| 447 | if (params.num_layers > 1) { | 606 | OGLBuffer& buffer = buffers.emplace_back(); |
| 448 | UNIMPLEMENTED_IF(params.base_layer != 0); | 607 | buffer.Create(); |
| 449 | glFramebufferTexture(fb_target, attachment, GetTexture(), 0); | 608 | glNamedBufferStorage(buffer.handle, requested_size, nullptr, |
| 450 | return; | 609 | storage_flags | GL_MAP_PERSISTENT_BIT); |
| 610 | maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, | ||
| 611 | map_flags | GL_MAP_PERSISTENT_BIT))); | ||
| 612 | |||
| 613 | syncs.emplace_back(); | ||
| 614 | sizes.push_back(requested_size); | ||
| 615 | |||
| 616 | ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||
| 617 | maps.size() == sizes.size()); | ||
| 618 | |||
| 619 | return buffers.size() - 1; | ||
| 620 | } | ||
| 621 | |||
| 622 | std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { | ||
| 623 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); | ||
| 624 | std::optional<size_t> found; | ||
| 625 | const size_t num_buffers = sizes.size(); | ||
| 626 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 627 | const size_t buffer_size = sizes[index]; | ||
| 628 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | ||
| 629 | continue; | ||
| 630 | } | ||
| 631 | if (syncs[index].handle != 0) { | ||
| 632 | GLint status; | ||
| 633 | glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); | ||
| 634 | if (status != GL_SIGNALED) { | ||
| 635 | continue; | ||
| 636 | } | ||
| 637 | syncs[index].Release(); | ||
| 638 | } | ||
| 639 | smallest_buffer = buffer_size; | ||
| 640 | found = index; | ||
| 451 | } | 641 | } |
| 642 | return found; | ||
| 643 | } | ||
| 452 | 644 | ||
| 453 | const GLenum view_target = surface.GetTarget(); | 645 | Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, |
| 454 | const GLuint texture = surface.GetTexture(); | 646 | VAddr cpu_addr_) |
| 455 | switch (surface.GetSurfaceParams().target) { | 647 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { |
| 456 | case SurfaceTarget::Texture1D: | 648 | if (CanBeAccelerated(runtime, info)) { |
| 457 | glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); | 649 | flags |= ImageFlagBits::AcceleratedUpload; |
| 650 | } | ||
| 651 | if (IsConverted(runtime.device, info.format, info.type)) { | ||
| 652 | flags |= ImageFlagBits::Converted; | ||
| 653 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 654 | gl_store_format = GL_RGBA8; | ||
| 655 | gl_format = GL_RGBA; | ||
| 656 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | ||
| 657 | } else { | ||
| 658 | const auto& tuple = GetFormatTuple(info.format); | ||
| 659 | gl_internal_format = tuple.internal_format; | ||
| 660 | gl_store_format = tuple.store_format; | ||
| 661 | gl_format = tuple.format; | ||
| 662 | gl_type = tuple.type; | ||
| 663 | } | ||
| 664 | const GLenum target = ImageTarget(info); | ||
| 665 | const GLsizei width = info.size.width; | ||
| 666 | const GLsizei height = info.size.height; | ||
| 667 | const GLsizei depth = info.size.depth; | ||
| 668 | const int max_host_mip_levels = std::bit_width(info.size.width); | ||
| 669 | const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 670 | const GLsizei num_layers = info.resources.layers; | ||
| 671 | const GLsizei num_samples = info.num_samples; | ||
| 672 | |||
| 673 | GLuint handle = 0; | ||
| 674 | if (target != GL_TEXTURE_BUFFER) { | ||
| 675 | texture.Create(target); | ||
| 676 | handle = texture.handle; | ||
| 677 | } | ||
| 678 | switch (target) { | ||
| 679 | case GL_TEXTURE_1D_ARRAY: | ||
| 680 | glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); | ||
| 458 | break; | 681 | break; |
| 459 | case SurfaceTarget::Texture2D: | 682 | case GL_TEXTURE_2D_ARRAY: |
| 460 | glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); | 683 | glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); |
| 461 | break; | 684 | break; |
| 462 | case SurfaceTarget::Texture1DArray: | 685 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { |
| 463 | case SurfaceTarget::Texture2DArray: | 686 | // TODO: Where should 'fixedsamplelocations' come from? |
| 464 | case SurfaceTarget::TextureCubemap: | 687 | const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); |
| 465 | case SurfaceTarget::TextureCubeArray: | 688 | glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, |
| 466 | glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, | 689 | height >> samples_y, num_layers, GL_FALSE); |
| 467 | params.base_layer); | 690 | break; |
| 691 | } | ||
| 692 | case GL_TEXTURE_RECTANGLE: | ||
| 693 | glTextureStorage2D(handle, num_levels, gl_store_format, width, height); | ||
| 694 | break; | ||
| 695 | case GL_TEXTURE_3D: | ||
| 696 | glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); | ||
| 697 | break; | ||
| 698 | case GL_TEXTURE_BUFFER: | ||
| 699 | buffer.Create(); | ||
| 700 | glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); | ||
| 468 | break; | 701 | break; |
| 469 | default: | 702 | default: |
| 470 | UNIMPLEMENTED(); | 703 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); |
| 704 | break; | ||
| 705 | } | ||
| 706 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 707 | const std::string name = VideoCommon::Name(*this); | ||
| 708 | glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, | ||
| 709 | static_cast<GLsizei>(name.size()), name.data()); | ||
| 471 | } | 710 | } |
| 472 | } | 711 | } |
| 473 | 712 | ||
| 474 | GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, | 713 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 475 | SwizzleSource z_source, SwizzleSource w_source) { | 714 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 476 | if (GetSurfaceParams().IsBuffer()) { | 715 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); |
| 477 | return GetTexture(); | 716 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); |
| 478 | } | ||
| 479 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 480 | if (current_swizzle == new_swizzle) { | ||
| 481 | return current_view; | ||
| 482 | } | ||
| 483 | current_swizzle = new_swizzle; | ||
| 484 | 717 | ||
| 485 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | 718 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| 486 | OGLTextureView& view = entry->second; | ||
| 487 | if (!is_cache_miss) { | ||
| 488 | current_view = view.handle; | ||
| 489 | return view.handle; | ||
| 490 | } | ||
| 491 | view = CreateTextureView(); | ||
| 492 | current_view = view.handle; | ||
| 493 | 719 | ||
| 494 | std::array swizzle{x_source, y_source, z_source, w_source}; | 720 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 721 | u32 current_image_height = std::numeric_limits<u32>::max(); | ||
| 495 | 722 | ||
| 496 | switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { | 723 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 497 | case PixelFormat::D24_UNORM_S8_UINT: | 724 | if (current_row_length != copy.buffer_row_length) { |
| 498 | case PixelFormat::D32_FLOAT_S8_UINT: | 725 | current_row_length = copy.buffer_row_length; |
| 499 | case PixelFormat::S8_UINT_D24_UNORM: | 726 | glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); |
| 500 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | 727 | } |
| 501 | glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | 728 | if (current_image_height != copy.buffer_image_height) { |
| 502 | GetComponent(pixel_format, x_source == SwizzleSource::R)); | 729 | current_image_height = copy.buffer_image_height; |
| 503 | 730 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); | |
| 504 | // Make sure we sample the first component | 731 | } |
| 505 | std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { | 732 | CopyBufferToImage(copy, buffer_offset); |
| 506 | return value == SwizzleSource::G ? SwizzleSource::R : value; | ||
| 507 | }); | ||
| 508 | [[fallthrough]]; | ||
| 509 | default: { | ||
| 510 | const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), | ||
| 511 | GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; | ||
| 512 | glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 513 | break; | ||
| 514 | } | ||
| 515 | } | 733 | } |
| 516 | return view.handle; | ||
| 517 | } | 734 | } |
| 518 | 735 | ||
| 519 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | 736 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 520 | OGLTextureView texture_view; | 737 | std::span<const VideoCommon::BufferCopy> copies) { |
| 521 | texture_view.Create(); | 738 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 522 | 739 | glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, | |
| 523 | if (target == GL_TEXTURE_3D) { | 740 | copy.dst_offset, copy.size); |
| 524 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 525 | params.base_level, params.num_levels, 0, 1); | ||
| 526 | } else { | ||
| 527 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 528 | params.base_level, params.num_levels, params.base_layer, params.num_layers); | ||
| 529 | } | 741 | } |
| 530 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); | ||
| 531 | |||
| 532 | return texture_view; | ||
| 533 | } | 742 | } |
| 534 | 743 | ||
| 535 | TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 744 | void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, |
| 536 | Tegra::Engines::Maxwell3D& maxwell3d_, | 745 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 537 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 746 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 538 | StateTracker& state_tracker_) | ||
| 539 | : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, | ||
| 540 | state_tracker{state_tracker_} { | ||
| 541 | src_framebuffer.Create(); | ||
| 542 | dst_framebuffer.Create(); | ||
| 543 | } | ||
| 544 | 747 | ||
| 545 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; | 748 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); |
| 749 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||
| 546 | 750 | ||
| 547 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | 751 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 548 | return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); | 752 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 549 | } | ||
| 550 | 753 | ||
| 551 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, | 754 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 552 | const VideoCommon::CopyParams& copy_params) { | 755 | if (current_row_length != copy.buffer_row_length) { |
| 553 | const auto& src_params = src_surface->GetSurfaceParams(); | 756 | current_row_length = copy.buffer_row_length; |
| 554 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 757 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); |
| 555 | if (src_params.type != dst_params.type) { | 758 | } |
| 556 | // A fallback is needed | 759 | if (current_image_height != copy.buffer_image_height) { |
| 557 | return; | 760 | current_image_height = copy.buffer_image_height; |
| 761 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | ||
| 762 | } | ||
| 763 | CopyImageToBuffer(copy, buffer_offset); | ||
| 558 | } | 764 | } |
| 559 | const auto src_handle = src_surface->GetTexture(); | ||
| 560 | const auto src_target = src_surface->GetTarget(); | ||
| 561 | const auto dst_handle = dst_surface->GetTexture(); | ||
| 562 | const auto dst_target = dst_surface->GetTarget(); | ||
| 563 | glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, | ||
| 564 | copy_params.source_y, copy_params.source_z, dst_handle, dst_target, | ||
| 565 | copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, | ||
| 566 | copy_params.dest_z, copy_params.width, copy_params.height, | ||
| 567 | copy_params.depth); | ||
| 568 | } | 765 | } |
| 569 | 766 | ||
| 570 | void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | 767 | void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { |
| 571 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 768 | // Compressed formats don't have a pixel format or type |
| 572 | const auto& src_params{src_view->GetSurfaceParams()}; | 769 | const bool is_compressed = gl_format == GL_NONE; |
| 573 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 770 | const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset); |
| 574 | UNIMPLEMENTED_IF(src_params.depth != 1); | ||
| 575 | UNIMPLEMENTED_IF(dst_params.depth != 1); | ||
| 576 | |||
| 577 | state_tracker.NotifyScissor0(); | ||
| 578 | state_tracker.NotifyFramebuffer(); | ||
| 579 | state_tracker.NotifyRasterizeEnable(); | ||
| 580 | state_tracker.NotifyFramebufferSRGB(); | ||
| 581 | 771 | ||
| 582 | if (dst_params.srgb_conversion) { | 772 | switch (info.type) { |
| 583 | glEnable(GL_FRAMEBUFFER_SRGB); | 773 | case ImageType::e1D: |
| 584 | } else { | 774 | if (is_compressed) { |
| 585 | glDisable(GL_FRAMEBUFFER_SRGB); | 775 | glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, |
| 776 | copy.image_offset.x, copy.image_subresource.base_layer, | ||
| 777 | copy.image_extent.width, | ||
| 778 | copy.image_subresource.num_layers, gl_internal_format, | ||
| 779 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 780 | } else { | ||
| 781 | glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, | ||
| 782 | copy.image_offset.x, copy.image_subresource.base_layer, | ||
| 783 | copy.image_extent.width, copy.image_subresource.num_layers, | ||
| 784 | gl_format, gl_type, offset); | ||
| 785 | } | ||
| 786 | break; | ||
| 787 | case ImageType::e2D: | ||
| 788 | case ImageType::Linear: | ||
| 789 | if (is_compressed) { | ||
| 790 | glCompressedTextureSubImage3D( | ||
| 791 | texture.handle, copy.image_subresource.base_level, copy.image_offset.x, | ||
| 792 | copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, | ||
| 793 | copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, | ||
| 794 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 795 | } else { | ||
| 796 | glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, | ||
| 797 | copy.image_offset.x, copy.image_offset.y, | ||
| 798 | copy.image_subresource.base_layer, copy.image_extent.width, | ||
| 799 | copy.image_extent.height, copy.image_subresource.num_layers, | ||
| 800 | gl_format, gl_type, offset); | ||
| 801 | } | ||
| 802 | break; | ||
| 803 | case ImageType::e3D: | ||
| 804 | if (is_compressed) { | ||
| 805 | glCompressedTextureSubImage3D( | ||
| 806 | texture.handle, copy.image_subresource.base_level, copy.image_offset.x, | ||
| 807 | copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, | ||
| 808 | copy.image_extent.height, copy.image_extent.depth, gl_internal_format, | ||
| 809 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 810 | } else { | ||
| 811 | glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, | ||
| 812 | copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, | ||
| 813 | copy.image_extent.width, copy.image_extent.height, | ||
| 814 | copy.image_extent.depth, gl_format, gl_type, offset); | ||
| 815 | } | ||
| 816 | break; | ||
| 817 | default: | ||
| 818 | UNREACHABLE(); | ||
| 586 | } | 819 | } |
| 587 | glDisable(GL_RASTERIZER_DISCARD); | 820 | } |
| 588 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 589 | |||
| 590 | glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); | ||
| 591 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); | ||
| 592 | |||
| 593 | GLenum buffers = 0; | ||
| 594 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 595 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); | ||
| 596 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 597 | 0); | ||
| 598 | |||
| 599 | dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 600 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 601 | 0); | ||
| 602 | |||
| 603 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 604 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 605 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 606 | src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 607 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 608 | 821 | ||
| 609 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 822 | void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { |
| 610 | dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | 823 | const GLint x_offset = copy.image_offset.x; |
| 611 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 824 | const GLsizei width = copy.image_extent.width; |
| 612 | 825 | ||
| 613 | buffers = GL_DEPTH_BUFFER_BIT; | 826 | const GLint level = copy.image_subresource.base_level; |
| 614 | } else if (src_params.type == SurfaceType::DepthStencil) { | 827 | const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size); |
| 615 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 828 | void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset); |
| 616 | src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 617 | 829 | ||
| 618 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 830 | GLint y_offset = 0; |
| 619 | dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | 831 | GLint z_offset = 0; |
| 832 | GLsizei height = 1; | ||
| 833 | GLsizei depth = 1; | ||
| 620 | 834 | ||
| 621 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | 835 | switch (info.type) { |
| 836 | case ImageType::e1D: | ||
| 837 | y_offset = copy.image_subresource.base_layer; | ||
| 838 | height = copy.image_subresource.num_layers; | ||
| 839 | break; | ||
| 840 | case ImageType::e2D: | ||
| 841 | case ImageType::Linear: | ||
| 842 | y_offset = copy.image_offset.y; | ||
| 843 | z_offset = copy.image_subresource.base_layer; | ||
| 844 | height = copy.image_extent.height; | ||
| 845 | depth = copy.image_subresource.num_layers; | ||
| 846 | break; | ||
| 847 | case ImageType::e3D: | ||
| 848 | y_offset = copy.image_offset.y; | ||
| 849 | z_offset = copy.image_offset.z; | ||
| 850 | height = copy.image_extent.height; | ||
| 851 | depth = copy.image_extent.depth; | ||
| 852 | break; | ||
| 853 | default: | ||
| 854 | UNREACHABLE(); | ||
| 855 | } | ||
| 856 | // Compressed formats don't have a pixel format or type | ||
| 857 | const bool is_compressed = gl_format == GL_NONE; | ||
| 858 | if (is_compressed) { | ||
| 859 | glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, | ||
| 860 | height, depth, buffer_size, offset); | ||
| 861 | } else { | ||
| 862 | glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, | ||
| 863 | depth, gl_format, gl_type, buffer_size, offset); | ||
| 622 | } | 864 | } |
| 623 | |||
| 624 | const Common::Rectangle<u32>& src_rect = copy_config.src_rect; | ||
| 625 | const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; | ||
| 626 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 627 | |||
| 628 | glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), | ||
| 629 | static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), | ||
| 630 | static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), | ||
| 631 | static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), | ||
| 632 | buffers, | ||
| 633 | is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); | ||
| 634 | } | 865 | } |
| 635 | 866 | ||
| 636 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 867 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 637 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | 868 | ImageId image_id_, Image& image) |
| 638 | const auto& src_params = src_surface->GetSurfaceParams(); | 869 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { |
| 639 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 870 | const Device& device = runtime.device; |
| 640 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 871 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 872 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 873 | } else { | ||
| 874 | internal_format = GetFormatTuple(format).internal_format; | ||
| 875 | } | ||
| 876 | VideoCommon::SubresourceRange flatten_range = info.range; | ||
| 877 | std::array<GLuint, 2> handles; | ||
| 878 | stored_views.reserve(2); | ||
| 641 | 879 | ||
| 642 | const auto source_format = GetFormatTuple(src_params.pixel_format); | 880 | switch (info.type) { |
| 643 | const auto dest_format = GetFormatTuple(dst_params.pixel_format); | 881 | case ImageViewType::e1DArray: |
| 882 | flatten_range.extent.layers = 1; | ||
| 883 | [[fallthrough]]; | ||
| 884 | case ImageViewType::e1D: | ||
| 885 | glGenTextures(2, handles.data()); | ||
| 886 | SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); | ||
| 887 | SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); | ||
| 888 | break; | ||
| 889 | case ImageViewType::e2DArray: | ||
| 890 | flatten_range.extent.layers = 1; | ||
| 891 | [[fallthrough]]; | ||
| 892 | case ImageViewType::e2D: | ||
| 893 | if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { | ||
| 894 | // 2D and 2D array views on a 3D textures are used exclusively for render targets | ||
| 895 | ASSERT(info.range.extent.levels == 1); | ||
| 896 | const VideoCommon::SubresourceRange slice_range{ | ||
| 897 | .base = {.level = info.range.base.level, .layer = 0}, | ||
| 898 | .extent = {.levels = 1, .layers = 1}, | ||
| 899 | }; | ||
| 900 | glGenTextures(1, handles.data()); | ||
| 901 | SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); | ||
| 902 | break; | ||
| 903 | } | ||
| 904 | glGenTextures(2, handles.data()); | ||
| 905 | SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); | ||
| 906 | SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); | ||
| 907 | break; | ||
| 908 | case ImageViewType::e3D: | ||
| 909 | glGenTextures(1, handles.data()); | ||
| 910 | SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); | ||
| 911 | break; | ||
| 912 | case ImageViewType::CubeArray: | ||
| 913 | flatten_range.extent.layers = 6; | ||
| 914 | [[fallthrough]]; | ||
| 915 | case ImageViewType::Cube: | ||
| 916 | glGenTextures(2, handles.data()); | ||
| 917 | SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); | ||
| 918 | SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); | ||
| 919 | break; | ||
| 920 | case ImageViewType::Rect: | ||
| 921 | glGenTextures(1, handles.data()); | ||
| 922 | SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); | ||
| 923 | break; | ||
| 924 | case ImageViewType::Buffer: | ||
| 925 | glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); | ||
| 926 | SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); | ||
| 927 | break; | ||
| 928 | } | ||
| 929 | default_handle = Handle(info.type); | ||
| 930 | } | ||
| 644 | 931 | ||
| 645 | const std::size_t source_size = src_surface->GetHostSizeInBytes(); | 932 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) |
| 646 | const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); | 933 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| 647 | 934 | ||
| 648 | const std::size_t buffer_size = std::max(source_size, dest_size); | 935 | void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, |
| 936 | GLuint handle, const VideoCommon::ImageViewInfo& info, | ||
| 937 | VideoCommon::SubresourceRange view_range) { | ||
| 938 | if (info.type == ImageViewType::Buffer) { | ||
| 939 | // TODO: Take offset from buffer cache | ||
| 940 | glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, | ||
| 941 | image.guest_size_bytes); | ||
| 942 | } else { | ||
| 943 | const GLuint parent = image.texture.handle; | ||
| 944 | const GLenum target = ImageTarget(view_type, image.info.num_samples); | ||
| 945 | glTextureView(handle, target, parent, internal_format, view_range.base.level, | ||
| 946 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); | ||
| 947 | if (!info.IsRenderTarget()) { | ||
| 948 | ApplySwizzle(handle, format, info.Swizzle()); | ||
| 949 | } | ||
| 950 | } | ||
| 951 | if (device.HasDebuggingToolAttached()) { | ||
| 952 | const std::string name = VideoCommon::Name(*this, view_type); | ||
| 953 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 954 | } | ||
| 955 | stored_views.emplace_back().handle = handle; | ||
| 956 | views[static_cast<size_t>(view_type)] = handle; | ||
| 957 | } | ||
| 649 | 958 | ||
| 650 | GLuint copy_pbo_handle = FetchPBO(buffer_size); | 959 | Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { |
| 960 | const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; | ||
| 961 | const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); | ||
| 962 | const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); | ||
| 963 | const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); | ||
| 964 | const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); | ||
| 965 | const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; | ||
| 966 | |||
| 967 | UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); | ||
| 968 | UNIMPLEMENTED_IF(config.float_coord_normalization != 0); | ||
| 969 | |||
| 970 | sampler.Create(); | ||
| 971 | const GLuint handle = sampler.handle; | ||
| 972 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); | ||
| 973 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); | ||
| 974 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); | ||
| 975 | glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); | ||
| 976 | glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); | ||
| 977 | glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); | ||
| 978 | glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); | ||
| 979 | glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); | ||
| 980 | glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); | ||
| 981 | glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); | ||
| 982 | glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); | ||
| 983 | |||
| 984 | if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 985 | glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); | ||
| 986 | } else { | ||
| 987 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); | ||
| 988 | } | ||
| 989 | if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { | ||
| 990 | glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); | ||
| 991 | } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { | ||
| 992 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); | ||
| 993 | } | ||
| 994 | if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 995 | glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); | ||
| 996 | } else if (seamless == GL_FALSE) { | ||
| 997 | // We default to false because it's more common | ||
| 998 | LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); | ||
| 999 | } | ||
| 1000 | } | ||
| 651 | 1001 | ||
| 652 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | 1002 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1003 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | ||
| 1004 | // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of | ||
| 1005 | // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared | ||
| 1006 | // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with | ||
| 1007 | // mismatching size, this is why core framebuffers are preferred. | ||
| 1008 | GLuint handle; | ||
| 1009 | glGenFramebuffers(1, &handle); | ||
| 1010 | glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); | ||
| 1011 | |||
| 1012 | GLsizei num_buffers = 0; | ||
| 1013 | std::array<GLenum, NUM_RT> gl_draw_buffers; | ||
| 1014 | gl_draw_buffers.fill(GL_NONE); | ||
| 1015 | |||
| 1016 | for (size_t index = 0; index < color_buffers.size(); ++index) { | ||
| 1017 | const ImageView* const image_view = color_buffers[index]; | ||
| 1018 | if (!image_view) { | ||
| 1019 | continue; | ||
| 1020 | } | ||
| 1021 | buffer_bits |= GL_COLOR_BUFFER_BIT; | ||
| 1022 | gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; | ||
| 1023 | num_buffers = static_cast<GLsizei>(index + 1); | ||
| 653 | 1024 | ||
| 654 | if (src_surface->IsCompressed()) { | 1025 | const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index); |
| 655 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | 1026 | AttachTexture(handle, attachment, image_view); |
| 656 | nullptr); | ||
| 657 | } else { | ||
| 658 | glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, | ||
| 659 | static_cast<GLsizei>(source_size), nullptr); | ||
| 660 | } | 1027 | } |
| 661 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 662 | 1028 | ||
| 663 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | 1029 | if (const ImageView* const image_view = depth_buffer; image_view) { |
| 1030 | if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { | ||
| 1031 | buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 1032 | } else { | ||
| 1033 | buffer_bits |= GL_DEPTH_BUFFER_BIT; | ||
| 1034 | } | ||
| 1035 | const GLenum attachment = AttachmentType(image_view->format); | ||
| 1036 | AttachTexture(handle, attachment, image_view); | ||
| 1037 | } | ||
| 664 | 1038 | ||
| 665 | const GLsizei width = static_cast<GLsizei>(dst_params.width); | 1039 | if (num_buffers > 1) { |
| 666 | const GLsizei height = static_cast<GLsizei>(dst_params.height); | 1040 | glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); |
| 667 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | 1041 | } else if (num_buffers > 0) { |
| 668 | if (dst_surface->IsCompressed()) { | 1042 | glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); |
| 669 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | ||
| 670 | UNREACHABLE(); | ||
| 671 | } else { | 1043 | } else { |
| 672 | switch (dst_params.target) { | 1044 | glNamedFramebufferDrawBuffer(handle, GL_NONE); |
| 673 | case SurfaceTarget::Texture1D: | ||
| 674 | glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, | ||
| 675 | dest_format.type, nullptr); | ||
| 676 | break; | ||
| 677 | case SurfaceTarget::Texture2D: | ||
| 678 | glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, | ||
| 679 | dest_format.format, dest_format.type, nullptr); | ||
| 680 | break; | ||
| 681 | case SurfaceTarget::Texture3D: | ||
| 682 | case SurfaceTarget::Texture2DArray: | ||
| 683 | case SurfaceTarget::TextureCubeArray: | ||
| 684 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 685 | dest_format.format, dest_format.type, nullptr); | ||
| 686 | break; | ||
| 687 | case SurfaceTarget::TextureCubemap: | ||
| 688 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 689 | dest_format.format, dest_format.type, nullptr); | ||
| 690 | break; | ||
| 691 | default: | ||
| 692 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); | ||
| 693 | UNREACHABLE(); | ||
| 694 | } | ||
| 695 | } | 1045 | } |
| 696 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 697 | 1046 | ||
| 698 | glTextureBarrier(); | 1047 | glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); |
| 699 | } | 1048 | glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); |
| 1049 | // TODO | ||
| 1050 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); | ||
| 1051 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); | ||
| 1052 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); | ||
| 700 | 1053 | ||
| 701 | GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { | 1054 | if (runtime.device.HasDebuggingToolAttached()) { |
| 702 | ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); | 1055 | const std::string name = VideoCommon::Name(key); |
| 703 | const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); | 1056 | glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 704 | OGLBuffer& cp = copy_pbo_cache[l2]; | ||
| 705 | if (cp.handle == 0) { | ||
| 706 | const std::size_t ceil_size = 1ULL << l2; | ||
| 707 | cp.Create(); | ||
| 708 | cp.MakeStreamCopy(ceil_size); | ||
| 709 | } | 1057 | } |
| 710 | return cp.handle; | 1058 | framebuffer.handle = handle; |
| 711 | } | 1059 | } |
| 712 | 1060 | ||
| 713 | } // namespace OpenGL | 1061 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -4,157 +4,247 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | 7 | #include <memory> |
| 10 | #include <unordered_map> | 8 | #include <span> |
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | 9 | ||
| 14 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 15 | 11 | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/util_shaders.h" | ||
| 20 | #include "video_core/texture_cache/texture_cache.h" | 14 | #include "video_core/texture_cache/texture_cache.h" |
| 21 | 15 | ||
| 22 | namespace OpenGL { | 16 | namespace OpenGL { |
| 23 | 17 | ||
| 24 | using VideoCommon::SurfaceParams; | 18 | class Device; |
| 25 | using VideoCommon::ViewParams; | 19 | class ProgramManager; |
| 26 | |||
| 27 | class CachedSurfaceView; | ||
| 28 | class CachedSurface; | ||
| 29 | class TextureCacheOpenGL; | ||
| 30 | class StateTracker; | 20 | class StateTracker; |
| 31 | 21 | ||
| 32 | using Surface = std::shared_ptr<CachedSurface>; | 22 | class Framebuffer; |
| 33 | using View = std::shared_ptr<CachedSurfaceView>; | 23 | class Image; |
| 34 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | 24 | class ImageView; |
| 25 | class Sampler; | ||
| 35 | 26 | ||
| 36 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | 27 | using VideoCommon::ImageId; |
| 37 | friend CachedSurfaceView; | 28 | using VideoCommon::ImageViewId; |
| 29 | using VideoCommon::ImageViewType; | ||
| 30 | using VideoCommon::NUM_RT; | ||
| 31 | using VideoCommon::Offset2D; | ||
| 32 | using VideoCommon::RenderTargets; | ||
| 38 | 33 | ||
| 34 | class ImageBufferMap { | ||
| 39 | public: | 35 | public: |
| 40 | explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, | 36 | explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); |
| 41 | bool is_astc_supported_); | 37 | ~ImageBufferMap(); |
| 42 | ~CachedSurface(); | ||
| 43 | |||
| 44 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 45 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 46 | 38 | ||
| 47 | GLenum GetTarget() const { | 39 | GLuint Handle() const noexcept { |
| 48 | return target; | 40 | return handle; |
| 49 | } | 41 | } |
| 50 | 42 | ||
| 51 | GLuint GetTexture() const { | 43 | std::span<u8> Span() const noexcept { |
| 52 | return texture.handle; | 44 | return span; |
| 53 | } | 45 | } |
| 54 | 46 | ||
| 55 | bool IsCompressed() const { | 47 | private: |
| 56 | return is_compressed; | 48 | std::span<u8> span; |
| 49 | OGLSync* sync; | ||
| 50 | GLuint handle; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct FormatProperties { | ||
| 54 | GLenum compatibility_class; | ||
| 55 | bool compatibility_by_size; | ||
| 56 | bool is_compressed; | ||
| 57 | }; | ||
| 58 | |||
| 59 | class TextureCacheRuntime { | ||
| 60 | friend Framebuffer; | ||
| 61 | friend Image; | ||
| 62 | friend ImageView; | ||
| 63 | friend Sampler; | ||
| 64 | |||
| 65 | public: | ||
| 66 | explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, | ||
| 67 | StateTracker& state_tracker); | ||
| 68 | ~TextureCacheRuntime(); | ||
| 69 | |||
| 70 | void Finish(); | ||
| 71 | |||
| 72 | ImageBufferMap MapUploadBuffer(size_t size); | ||
| 73 | |||
| 74 | ImageBufferMap MapDownloadBuffer(size_t size); | ||
| 75 | |||
| 76 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 77 | |||
| 78 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { | ||
| 79 | UNIMPLEMENTED(); | ||
| 57 | } | 80 | } |
| 58 | 81 | ||
| 59 | protected: | 82 | bool CanImageBeCopied(const Image& dst, const Image& src); |
| 60 | void DecorateSurfaceName() override; | 83 | |
| 84 | void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 85 | |||
| 86 | void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | ||
| 87 | const std::array<Offset2D, 2>& dst_region, | ||
| 88 | const std::array<Offset2D, 2>& src_region, | ||
| 89 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 90 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 61 | 91 | ||
| 62 | View CreateView(const ViewParams& view_key) override; | 92 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, |
| 63 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | 93 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 94 | |||
| 95 | void InsertUploadMemoryBarrier(); | ||
| 96 | |||
| 97 | FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; | ||
| 64 | 98 | ||
| 65 | private: | 99 | private: |
| 66 | void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); | 100 | struct StagingBuffers { |
| 101 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||
| 102 | ~StagingBuffers(); | ||
| 67 | 103 | ||
| 68 | GLenum internal_format{}; | 104 | ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); |
| 69 | GLenum format{}; | ||
| 70 | GLenum type{}; | ||
| 71 | bool is_compressed{}; | ||
| 72 | GLenum target{}; | ||
| 73 | u32 view_count{}; | ||
| 74 | 105 | ||
| 75 | OGLTexture texture; | 106 | size_t RequestBuffer(size_t requested_size); |
| 76 | OGLBuffer texture_buffer; | 107 | |
| 108 | std::optional<size_t> FindBuffer(size_t requested_size); | ||
| 109 | |||
| 110 | std::vector<OGLSync> syncs; | ||
| 111 | std::vector<OGLBuffer> buffers; | ||
| 112 | std::vector<u8*> maps; | ||
| 113 | std::vector<size_t> sizes; | ||
| 114 | GLenum storage_flags; | ||
| 115 | GLenum map_flags; | ||
| 116 | }; | ||
| 117 | |||
| 118 | const Device& device; | ||
| 119 | StateTracker& state_tracker; | ||
| 120 | UtilShaders util_shaders; | ||
| 121 | |||
| 122 | std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; | ||
| 123 | |||
| 124 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | ||
| 125 | StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; | ||
| 126 | |||
| 127 | OGLTexture null_image_1d_array; | ||
| 128 | OGLTexture null_image_cube_array; | ||
| 129 | OGLTexture null_image_3d; | ||
| 130 | OGLTexture null_image_rect; | ||
| 131 | OGLTextureView null_image_view_1d; | ||
| 132 | OGLTextureView null_image_view_2d; | ||
| 133 | OGLTextureView null_image_view_2d_array; | ||
| 134 | OGLTextureView null_image_view_cube; | ||
| 135 | |||
| 136 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; | ||
| 77 | }; | 137 | }; |
| 78 | 138 | ||
| 79 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 139 | class Image : public VideoCommon::ImageBase { |
| 140 | friend ImageView; | ||
| 141 | |||
| 80 | public: | 142 | public: |
| 81 | explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); | 143 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 82 | ~CachedSurfaceView(); | 144 | VAddr cpu_addr); |
| 83 | 145 | ||
| 84 | /// @brief Attaches this texture view to the currently bound fb_target framebuffer | 146 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 85 | /// @param attachment Attachment to bind textures to | 147 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 86 | /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) | ||
| 87 | void Attach(GLenum attachment, GLenum fb_target) const; | ||
| 88 | 148 | ||
| 89 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, | 149 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 90 | Tegra::Texture::SwizzleSource y_source, | 150 | std::span<const VideoCommon::BufferCopy> copies); |
| 91 | Tegra::Texture::SwizzleSource z_source, | ||
| 92 | Tegra::Texture::SwizzleSource w_source); | ||
| 93 | 151 | ||
| 94 | void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); | 152 | void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, |
| 153 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 95 | 154 | ||
| 96 | void MarkAsModified(u64 tick) { | 155 | GLuint Handle() const noexcept { |
| 97 | surface.MarkAsModified(true, tick); | 156 | return texture.handle; |
| 98 | } | 157 | } |
| 99 | 158 | ||
| 100 | GLuint GetTexture() const { | 159 | private: |
| 101 | if (is_proxy) { | 160 | void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 102 | return surface.GetTexture(); | 161 | |
| 103 | } | 162 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 104 | return main_view.handle; | 163 | |
| 164 | OGLTexture texture; | ||
| 165 | OGLTextureView store_view; | ||
| 166 | OGLBuffer buffer; | ||
| 167 | GLenum gl_internal_format = GL_NONE; | ||
| 168 | GLenum gl_store_format = GL_NONE; | ||
| 169 | GLenum gl_format = GL_NONE; | ||
| 170 | GLenum gl_type = GL_NONE; | ||
| 171 | }; | ||
| 172 | |||
| 173 | class ImageView : public VideoCommon::ImageViewBase { | ||
| 174 | friend Image; | ||
| 175 | |||
| 176 | public: | ||
| 177 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | ||
| 178 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | ||
| 179 | |||
| 180 | [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { | ||
| 181 | return views[static_cast<size_t>(query_type)]; | ||
| 105 | } | 182 | } |
| 106 | 183 | ||
| 107 | GLenum GetFormat() const { | 184 | [[nodiscard]] GLuint DefaultHandle() const noexcept { |
| 108 | return format; | 185 | return default_handle; |
| 109 | } | 186 | } |
| 110 | 187 | ||
| 111 | const SurfaceParams& GetSurfaceParams() const { | 188 | [[nodiscard]] GLenum Format() const noexcept { |
| 112 | return surface.GetSurfaceParams(); | 189 | return internal_format; |
| 113 | } | 190 | } |
| 114 | 191 | ||
| 115 | private: | 192 | private: |
| 116 | OGLTextureView CreateTextureView() const; | 193 | void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, |
| 194 | const VideoCommon::ImageViewInfo& info, | ||
| 195 | VideoCommon::SubresourceRange view_range); | ||
| 196 | |||
| 197 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; | ||
| 198 | std::vector<OGLTextureView> stored_views; | ||
| 199 | GLuint default_handle = 0; | ||
| 200 | GLenum internal_format = GL_NONE; | ||
| 201 | }; | ||
| 202 | |||
| 203 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | ||
| 117 | 204 | ||
| 118 | CachedSurface& surface; | 205 | class Sampler { |
| 119 | const GLenum format; | 206 | public: |
| 120 | const GLenum target; | 207 | explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); |
| 121 | const bool is_proxy; | ||
| 122 | 208 | ||
| 123 | std::unordered_map<u32, OGLTextureView> view_cache; | 209 | GLuint Handle() const noexcept { |
| 124 | OGLTextureView main_view; | 210 | return sampler.handle; |
| 211 | } | ||
| 125 | 212 | ||
| 126 | // Use an invalid default so it always fails the comparison test | 213 | private: |
| 127 | u32 current_swizzle = 0xffffffff; | 214 | OGLSampler sampler; |
| 128 | GLuint current_view = 0; | ||
| 129 | }; | 215 | }; |
| 130 | 216 | ||
| 131 | class TextureCacheOpenGL final : public TextureCacheBase { | 217 | class Framebuffer { |
| 132 | public: | 218 | public: |
| 133 | explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 219 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, |
| 134 | Tegra::Engines::Maxwell3D& maxwell3d_, | 220 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 135 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||
| 136 | StateTracker& state_tracker); | ||
| 137 | ~TextureCacheOpenGL(); | ||
| 138 | |||
| 139 | protected: | ||
| 140 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 141 | |||
| 142 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 143 | const VideoCommon::CopyParams& copy_params) override; | ||
| 144 | 221 | ||
| 145 | void ImageBlit(View& src_view, View& dst_view, | 222 | [[nodiscard]] GLuint Handle() const noexcept { |
| 146 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 223 | return framebuffer.handle; |
| 224 | } | ||
| 147 | 225 | ||
| 148 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | 226 | [[nodiscard]] GLbitfield BufferBits() const noexcept { |
| 227 | return buffer_bits; | ||
| 228 | } | ||
| 149 | 229 | ||
| 150 | private: | 230 | private: |
| 151 | GLuint FetchPBO(std::size_t buffer_size); | 231 | OGLFramebuffer framebuffer; |
| 152 | 232 | GLbitfield buffer_bits = GL_NONE; | |
| 153 | StateTracker& state_tracker; | 233 | }; |
| 154 | 234 | ||
| 155 | OGLFramebuffer src_framebuffer; | 235 | struct TextureCacheParams { |
| 156 | OGLFramebuffer dst_framebuffer; | 236 | static constexpr bool ENABLE_VALIDATION = true; |
| 157 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | 237 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 238 | static constexpr bool HAS_EMULATED_COPIES = true; | ||
| 239 | |||
| 240 | using Runtime = OpenGL::TextureCacheRuntime; | ||
| 241 | using Image = OpenGL::Image; | ||
| 242 | using ImageAlloc = OpenGL::ImageAlloc; | ||
| 243 | using ImageView = OpenGL::ImageView; | ||
| 244 | using Sampler = OpenGL::Sampler; | ||
| 245 | using Framebuffer = OpenGL::Framebuffer; | ||
| 158 | }; | 246 | }; |
| 159 | 247 | ||
| 248 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||
| 249 | |||
| 160 | } // namespace OpenGL | 250 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { | |||
| 475 | return GL_FILL; | 475 | return GL_FILL; |
| 476 | } | 476 | } |
| 477 | 477 | ||
| 478 | inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { | ||
| 479 | switch (filter) { | ||
| 480 | case Tegra::Texture::SamplerReduction::WeightedAverage: | ||
| 481 | return GL_WEIGHTED_AVERAGE_ARB; | ||
| 482 | case Tegra::Texture::SamplerReduction::Min: | ||
| 483 | return GL_MIN; | ||
| 484 | case Tegra::Texture::SamplerReduction::Max: | ||
| 485 | return GL_MAX; | ||
| 486 | } | ||
| 487 | UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter)); | ||
| 488 | return GL_WEIGHTED_AVERAGE_ARB; | ||
| 489 | } | ||
| 490 | |||
| 478 | inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { | 491 | inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { |
| 479 | // Enumeration order matches register order. We can convert it arithmetically. | 492 | // Enumeration order matches register order. We can convert it arithmetically. |
| 480 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); | 493 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -23,10 +23,10 @@ | |||
| 23 | #include "core/telemetry_session.h" | 23 | #include "core/telemetry_session.h" |
| 24 | #include "video_core/host_shaders/opengl_present_frag.h" | 24 | #include "video_core/host_shaders/opengl_present_frag.h" |
| 25 | #include "video_core/host_shaders/opengl_present_vert.h" | 25 | #include "video_core/host_shaders/opengl_present_vert.h" |
| 26 | #include "video_core/morton.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 29 | #include "video_core/renderer_opengl/renderer_opengl.h" | 28 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 29 | #include "video_core/textures/decoders.h" | ||
| 30 | 30 | ||
| 31 | namespace OpenGL { | 31 | namespace OpenGL { |
| 32 | 32 | ||
| @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 140 | if (!framebuffer) { | 140 | if (!framebuffer) { |
| 141 | return; | 141 | return; |
| 142 | } | 142 | } |
| 143 | |||
| 144 | PrepareRendertarget(framebuffer); | 143 | PrepareRendertarget(framebuffer); |
| 145 | RenderScreenshot(); | 144 | RenderScreenshot(); |
| 146 | 145 | ||
| 147 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); | 146 | state_tracker.BindFramebuffer(0); |
| 148 | DrawScreen(emu_window.GetFramebufferLayout()); | 147 | DrawScreen(emu_window.GetFramebufferLayout()); |
| 149 | 148 | ||
| 150 | ++m_current_frame; | 149 | ++m_current_frame; |
| @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 187 | // Reset the screen info's display texture to its own permanent texture | 186 | // Reset the screen info's display texture to its own permanent texture |
| 188 | screen_info.display_texture = screen_info.texture.resource.handle; | 187 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 189 | 188 | ||
| 190 | const auto pixel_format{ | ||
| 191 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | ||
| 192 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 193 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 194 | u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | ||
| 195 | rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); | ||
| 196 | |||
| 197 | // TODO(Rodrigo): Read this from HLE | 189 | // TODO(Rodrigo): Read this from HLE |
| 198 | constexpr u32 block_height_log2 = 4; | 190 | constexpr u32 block_height_log2 = 4; |
| 199 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | 191 | const auto pixel_format{ |
| 200 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | 192 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 201 | gl_framebuffer_data.data(), host_ptr); | 193 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 202 | 194 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( | |
| 195 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | ||
| 196 | const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | ||
| 197 | const std::span<const u8> input_data(host_ptr, size_in_bytes); | ||
| 198 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | ||
| 199 | framebuffer.width, framebuffer.height, 1, block_height_log2, | ||
| 200 | 0); | ||
| 201 | |||
| 202 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 203 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | 203 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); |
| 204 | 204 | ||
| 205 | // Update existing texture | 205 | // Update existing texture |
| @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 238 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); | 238 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); |
| 239 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | 239 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); |
| 240 | 240 | ||
| 241 | // Generate presentation sampler | ||
| 242 | present_sampler.Create(); | ||
| 243 | glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 244 | |||
| 241 | // Generate VBO handle for drawing | 245 | // Generate VBO handle for drawing |
| 242 | vertex_buffer.Create(); | 246 | vertex_buffer.Create(); |
| 243 | 247 | ||
| @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 255 | // Clear screen to black | 259 | // Clear screen to black |
| 256 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 260 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 257 | 261 | ||
| 262 | // Enable seamless cubemaps when per texture parameters are not available | ||
| 263 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 264 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | ||
| 265 | } | ||
| 266 | |||
| 258 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | 267 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it |
| 259 | if (device.HasVertexBufferUnifiedMemory()) { | 268 | if (device.HasVertexBufferUnifiedMemory()) { |
| 260 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | 269 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); |
| @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 296 | 305 | ||
| 297 | const auto pixel_format{ | 306 | const auto pixel_format{ |
| 298 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | 307 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 299 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | 308 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 300 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); | 309 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); |
| 301 | 310 | ||
| 302 | GLint internal_format; | 311 | GLint internal_format; |
| @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 315 | internal_format = GL_RGBA8; | 324 | internal_format = GL_RGBA8; |
| 316 | texture.gl_format = GL_RGBA; | 325 | texture.gl_format = GL_RGBA; |
| 317 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 326 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 318 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", | 327 | // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", |
| 319 | static_cast<u32>(framebuffer.pixel_format)); | 328 | // static_cast<u32>(framebuffer.pixel_format)); |
| 320 | } | 329 | } |
| 321 | 330 | ||
| 322 | texture.resource.Release(); | 331 | texture.resource.Release(); |
| @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 382 | state_tracker.NotifyPolygonModes(); | 391 | state_tracker.NotifyPolygonModes(); |
| 383 | state_tracker.NotifyViewport0(); | 392 | state_tracker.NotifyViewport0(); |
| 384 | state_tracker.NotifyScissor0(); | 393 | state_tracker.NotifyScissor0(); |
| 385 | state_tracker.NotifyColorMask0(); | 394 | state_tracker.NotifyColorMask(0); |
| 386 | state_tracker.NotifyBlend0(); | 395 | state_tracker.NotifyBlend0(); |
| 387 | state_tracker.NotifyFramebuffer(); | 396 | state_tracker.NotifyFramebuffer(); |
| 388 | state_tracker.NotifyFrontFace(); | 397 | state_tracker.NotifyFrontFace(); |
| @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 440 | } | 449 | } |
| 441 | 450 | ||
| 442 | glBindTextureUnit(0, screen_info.display_texture); | 451 | glBindTextureUnit(0, screen_info.display_texture); |
| 443 | glBindSampler(0, 0); | 452 | glBindSampler(0, present_sampler.handle); |
| 444 | 453 | ||
| 445 | glClear(GL_COLOR_BUFFER_BIT); | 454 | glClear(GL_COLOR_BUFFER_BIT); |
| 446 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 455 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 473 | 482 | ||
| 474 | DrawScreen(layout); | 483 | DrawScreen(layout); |
| 475 | 484 | ||
| 485 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 486 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 476 | glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, | 487 | glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, |
| 477 | renderer_settings.screenshot_bits); | 488 | renderer_settings.screenshot_bits); |
| 478 | 489 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -102,6 +102,7 @@ private: | |||
| 102 | StateTracker state_tracker{gpu}; | 102 | StateTracker state_tracker{gpu}; |
| 103 | 103 | ||
| 104 | // OpenGL object IDs | 104 | // OpenGL object IDs |
| 105 | OGLSampler present_sampler; | ||
| 105 | OGLBuffer vertex_buffer; | 106 | OGLBuffer vertex_buffer; |
| 106 | OGLProgram vertex_program; | 107 | OGLProgram vertex_program; |
| 107 | OGLProgram fragment_program; | 108 | OGLProgram fragment_program; |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <bit> | ||
| 6 | #include <span> | ||
| 7 | #include <string_view> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 14 | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||
| 15 | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||
| 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||
| 17 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 21 | #include "video_core/renderer_opengl/util_shaders.h" | ||
| 22 | #include "video_core/surface.h" | ||
| 23 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 24 | #include "video_core/texture_cache/types.h" | ||
| 25 | #include "video_core/texture_cache/util.h" | ||
| 26 | #include "video_core/textures/decoders.h" | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | using namespace HostShaders; | ||
| 31 | |||
| 32 | using VideoCommon::Extent3D; | ||
| 33 | using VideoCommon::ImageCopy; | ||
| 34 | using VideoCommon::ImageType; | ||
| 35 | using VideoCommon::SwizzleParameters; | ||
| 36 | using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; | ||
| 37 | using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; | ||
| 38 | using VideoCore::Surface::BytesPerBlock; | ||
| 39 | |||
| 40 | namespace { | ||
| 41 | |||
| 42 | OGLProgram MakeProgram(std::string_view source) { | ||
| 43 | OGLShader shader; | ||
| 44 | shader.Create(source, GL_COMPUTE_SHADER); | ||
| 45 | |||
| 46 | OGLProgram program; | ||
| 47 | program.Create(true, false, shader.handle); | ||
| 48 | return program; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||
| 54 | : program_manager{program_manager_}, | ||
| 55 | block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | ||
| 56 | block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||
| 57 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||
| 58 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | ||
| 59 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||
| 60 | swizzle_table_buffer.Create(); | ||
| 61 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | ||
| 62 | } | ||
| 63 | |||
| 64 | UtilShaders::~UtilShaders() = default; | ||
| 65 | |||
| 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 67 | std::span<const SwizzleParameters> swizzles) { | ||
| 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||
| 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||
| 70 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||
| 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 72 | |||
| 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | ||
| 74 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||
| 76 | |||
| 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||
| 78 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 79 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 80 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 81 | |||
| 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 84 | |||
| 85 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||
| 86 | glUniform3uiv(0, 1, params.origin.data()); | ||
| 87 | glUniform3iv(1, 1, params.destination.data()); | ||
| 88 | glUniform1ui(2, params.bytes_per_block_log2); | ||
| 89 | glUniform1ui(3, params.layer_stride); | ||
| 90 | glUniform1ui(4, params.block_size); | ||
| 91 | glUniform1ui(5, params.x_shift); | ||
| 92 | glUniform1ui(6, params.block_height); | ||
| 93 | glUniform1ui(7, params.block_height_mask); | ||
| 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 95 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||
| 97 | GL_WRITE_ONLY, store_format); | ||
| 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||
| 99 | } | ||
| 100 | program_manager.RestoreGuestCompute(); | ||
| 101 | } | ||
| 102 | |||
| 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 104 | std::span<const SwizzleParameters> swizzles) { | ||
| 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | ||
| 106 | |||
| 107 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||
| 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||
| 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 110 | |||
| 111 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | ||
| 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||
| 114 | |||
| 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||
| 116 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 117 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 118 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 119 | |||
| 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 122 | const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); | ||
| 123 | |||
| 124 | const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); | ||
| 125 | glUniform3uiv(0, 1, params.origin.data()); | ||
| 126 | glUniform3iv(1, 1, params.destination.data()); | ||
| 127 | glUniform1ui(2, params.bytes_per_block_log2); | ||
| 128 | glUniform1ui(3, params.slice_size); | ||
| 129 | glUniform1ui(4, params.block_size); | ||
| 130 | glUniform1ui(5, params.x_shift); | ||
| 131 | glUniform1ui(6, params.block_height); | ||
| 132 | glUniform1ui(7, params.block_height_mask); | ||
| 133 | glUniform1ui(8, params.block_depth); | ||
| 134 | glUniform1ui(9, params.block_depth_mask); | ||
| 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 136 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||
| 138 | GL_WRITE_ONLY, store_format); | ||
| 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||
| 140 | } | ||
| 141 | program_manager.RestoreGuestCompute(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 145 | std::span<const SwizzleParameters> swizzles) { | ||
| 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||
| 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | ||
| 148 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 149 | static constexpr GLuint LOC_ORIGIN = 0; | ||
| 150 | static constexpr GLuint LOC_DESTINATION = 1; | ||
| 151 | static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; | ||
| 152 | static constexpr GLuint LOC_PITCH = 3; | ||
| 153 | |||
| 154 | const u32 bytes_per_block = BytesPerBlock(image.info.format); | ||
| 155 | const GLenum format = StoreFormat(bytes_per_block); | ||
| 156 | const u32 pitch = image.info.pitch; | ||
| 157 | |||
| 158 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), | ||
| 159 | "Non-power of two images are not implemented"); | ||
| 160 | |||
| 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | ||
| 162 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 163 | glUniform2ui(LOC_ORIGIN, 0, 0); | ||
| 164 | glUniform2i(LOC_DESTINATION, 0, 0); | ||
| 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | ||
| 166 | glUniform1ui(LOC_PITCH, pitch); | ||
| 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | ||
| 168 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 169 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 170 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 171 | |||
| 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 174 | |||
| 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 176 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||
| 178 | } | ||
| 179 | program_manager.RestoreGuestCompute(); | ||
| 180 | } | ||
| 181 | |||
| 182 | void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { | ||
| 183 | static constexpr GLuint BINDING_INPUT_IMAGE = 0; | ||
| 184 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; | ||
| 185 | static constexpr GLuint LOC_SRC_OFFSET = 0; | ||
| 186 | static constexpr GLuint LOC_DST_OFFSET = 1; | ||
| 187 | |||
| 188 | program_manager.BindHostCompute(copy_bc4_program.handle); | ||
| 189 | |||
| 190 | for (const ImageCopy& copy : copies) { | ||
| 191 | ASSERT(copy.src_subresource.base_layer == 0); | ||
| 192 | ASSERT(copy.src_subresource.num_layers == 1); | ||
| 193 | ASSERT(copy.dst_subresource.base_layer == 0); | ||
| 194 | ASSERT(copy.dst_subresource.num_layers == 1); | ||
| 195 | |||
| 196 | glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); | ||
| 197 | glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); | ||
| 198 | glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, | ||
| 199 | GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); | ||
| 200 | glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), | ||
| 201 | copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); | ||
| 202 | glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | ||
| 203 | } | ||
| 204 | program_manager.RestoreGuestCompute(); | ||
| 205 | } | ||
| 206 | |||
| 207 | GLenum StoreFormat(u32 bytes_per_block) { | ||
| 208 | switch (bytes_per_block) { | ||
| 209 | case 1: | ||
| 210 | return GL_R8UI; | ||
| 211 | case 2: | ||
| 212 | return GL_R16UI; | ||
| 213 | case 4: | ||
| 214 | return GL_R32UI; | ||
| 215 | case 8: | ||
| 216 | return GL_RG32UI; | ||
| 217 | case 16: | ||
| 218 | return GL_RGBA32UI; | ||
| 219 | } | ||
| 220 | UNREACHABLE(); | ||
| 221 | return GL_R8UI; | ||
| 222 | } | ||
| 223 | |||
| 224 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | #include "video_core/texture_cache/types.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | class Image; | ||
| 18 | class ImageBufferMap; | ||
| 19 | class ProgramManager; | ||
| 20 | |||
| 21 | class UtilShaders { | ||
| 22 | public: | ||
| 23 | explicit UtilShaders(ProgramManager& program_manager); | ||
| 24 | ~UtilShaders(); | ||
| 25 | |||
| 26 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 27 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 28 | |||
| 29 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 30 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 31 | |||
| 32 | void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 33 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 34 | |||
| 35 | void CopyBC4(Image& dst_image, Image& src_image, | ||
| 36 | std::span<const VideoCommon::ImageCopy> copies); | ||
| 37 | |||
| 38 | private: | ||
| 39 | ProgramManager& program_manager; | ||
| 40 | |||
| 41 | OGLBuffer swizzle_table_buffer; | ||
| 42 | |||
| 43 | OGLProgram block_linear_unswizzle_2d_program; | ||
| 44 | OGLProgram block_linear_unswizzle_3d_program; | ||
| 45 | OGLProgram pitch_unswizzle_program; | ||
| 46 | OGLProgram copy_bc4_program; | ||
| 47 | }; | ||
| 48 | |||
| 49 | GLenum StoreFormat(u32 bytes_per_block); | ||
| 50 | |||
| 51 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null | |||
| @@ -1,42 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { | ||
| 18 | if (!GLAD_GL_KHR_debug) { | ||
| 19 | // We don't need to throw an error as this is just for debugging | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string object_label; | ||
| 24 | if (extra_info.empty()) { | ||
| 25 | switch (identifier) { | ||
| 26 | case GL_TEXTURE: | ||
| 27 | object_label = fmt::format("Texture@0x{:016X}", addr); | ||
| 28 | break; | ||
| 29 | case GL_PROGRAM: | ||
| 30 | object_label = fmt::format("Shader@0x{:016X}", addr); | ||
| 31 | break; | ||
| 32 | default: | ||
| 33 | object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | } else { | ||
| 37 | object_label = fmt::format("{}@0x{:016X}", extra_info, addr); | ||
| 38 | } | ||
| 39 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null | |||
| @@ -1,16 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string_view> | ||
| 8 | #include <vector> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | ||
| 15 | |||
| 16 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..87c8e5693 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -0,0 +1,624 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" | ||
| 8 | #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" | ||
| 9 | #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" | ||
| 10 | #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" | ||
| 11 | #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" | ||
| 12 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 20 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 21 | #include "video_core/surface.h" | ||
| 22 | |||
| 23 | namespace Vulkan { | ||
| 24 | |||
| 25 | using VideoCommon::ImageViewType; | ||
| 26 | |||
| 27 | namespace { | ||
| 28 | struct PushConstants { | ||
| 29 | std::array<float, 2> tex_scale; | ||
| 30 | std::array<float, 2> tex_offset; | ||
| 31 | }; | ||
| 32 | |||
| 33 | template <u32 binding> | ||
| 34 | inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ | ||
| 35 | .binding = binding, | ||
| 36 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 37 | .descriptorCount = 1, | ||
| 38 | .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 39 | .pImmutableSamplers = nullptr, | ||
| 40 | }; | ||
| 41 | constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ | ||
| 42 | TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | ||
| 43 | TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, | ||
| 44 | }; | ||
| 45 | constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | ||
| 46 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 47 | .pNext = nullptr, | ||
| 48 | .flags = 0, | ||
| 49 | .bindingCount = 1, | ||
| 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | ||
| 51 | }; | ||
| 52 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | ||
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 54 | .pNext = nullptr, | ||
| 55 | .flags = 0, | ||
| 56 | .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), | ||
| 57 | .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), | ||
| 58 | }; | ||
| 59 | constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ | ||
| 60 | .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 61 | .offset = 0, | ||
| 62 | .size = sizeof(PushConstants), | ||
| 63 | }; | ||
| 64 | constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ | ||
| 65 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 66 | .pNext = nullptr, | ||
| 67 | .flags = 0, | ||
| 68 | .vertexBindingDescriptionCount = 0, | ||
| 69 | .pVertexBindingDescriptions = nullptr, | ||
| 70 | .vertexAttributeDescriptionCount = 0, | ||
| 71 | .pVertexAttributeDescriptions = nullptr, | ||
| 72 | }; | ||
| 73 | constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ | ||
| 74 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 75 | .pNext = nullptr, | ||
| 76 | .flags = 0, | ||
| 77 | .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | ||
| 78 | .primitiveRestartEnable = VK_FALSE, | ||
| 79 | }; | ||
| 80 | constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ | ||
| 81 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 82 | .pNext = nullptr, | ||
| 83 | .flags = 0, | ||
| 84 | .viewportCount = 1, | ||
| 85 | .pViewports = nullptr, | ||
| 86 | .scissorCount = 1, | ||
| 87 | .pScissors = nullptr, | ||
| 88 | }; | ||
| 89 | constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ | ||
| 90 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 91 | .pNext = nullptr, | ||
| 92 | .flags = 0, | ||
| 93 | .depthClampEnable = VK_FALSE, | ||
| 94 | .rasterizerDiscardEnable = VK_FALSE, | ||
| 95 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 96 | .cullMode = VK_CULL_MODE_BACK_BIT, | ||
| 97 | .frontFace = VK_FRONT_FACE_CLOCKWISE, | ||
| 98 | .depthBiasEnable = VK_FALSE, | ||
| 99 | .depthBiasConstantFactor = 0.0f, | ||
| 100 | .depthBiasClamp = 0.0f, | ||
| 101 | .depthBiasSlopeFactor = 0.0f, | ||
| 102 | .lineWidth = 1.0f, | ||
| 103 | }; | ||
| 104 | constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ | ||
| 105 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 106 | .pNext = nullptr, | ||
| 107 | .flags = 0, | ||
| 108 | .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, | ||
| 109 | .sampleShadingEnable = VK_FALSE, | ||
| 110 | .minSampleShading = 0.0f, | ||
| 111 | .pSampleMask = nullptr, | ||
| 112 | .alphaToCoverageEnable = VK_FALSE, | ||
| 113 | .alphaToOneEnable = VK_FALSE, | ||
| 114 | }; | ||
| 115 | constexpr std::array DYNAMIC_STATES{ | ||
| 116 | VK_DYNAMIC_STATE_VIEWPORT, | ||
| 117 | VK_DYNAMIC_STATE_SCISSOR, | ||
| 118 | }; | ||
| 119 | constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ | ||
| 120 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 121 | .pNext = nullptr, | ||
| 122 | .flags = 0, | ||
| 123 | .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()), | ||
| 124 | .pDynamicStates = DYNAMIC_STATES.data(), | ||
| 125 | }; | ||
| 126 | constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 128 | .pNext = nullptr, | ||
| 129 | .flags = 0, | ||
| 130 | .logicOpEnable = VK_FALSE, | ||
| 131 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 132 | .attachmentCount = 0, | ||
| 133 | .pAttachments = nullptr, | ||
| 134 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 135 | }; | ||
| 136 | constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ | ||
| 137 | .blendEnable = VK_FALSE, | ||
| 138 | .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 139 | .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 140 | .colorBlendOp = VK_BLEND_OP_ADD, | ||
| 141 | .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 142 | .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 143 | .alphaBlendOp = VK_BLEND_OP_ADD, | ||
| 144 | .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 145 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, | ||
| 146 | }; | ||
| 147 | constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ | ||
| 148 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 149 | .pNext = nullptr, | ||
| 150 | .flags = 0, | ||
| 151 | .logicOpEnable = VK_FALSE, | ||
| 152 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 153 | .attachmentCount = 1, | ||
| 154 | .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, | ||
| 155 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 156 | }; | ||
| 157 | constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ | ||
| 158 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 159 | .pNext = nullptr, | ||
| 160 | .flags = 0, | ||
| 161 | .depthTestEnable = VK_TRUE, | ||
| 162 | .depthWriteEnable = VK_TRUE, | ||
| 163 | .depthCompareOp = VK_COMPARE_OP_ALWAYS, | ||
| 164 | .depthBoundsTestEnable = VK_FALSE, | ||
| 165 | .stencilTestEnable = VK_FALSE, | ||
| 166 | .front = VkStencilOpState{}, | ||
| 167 | .back = VkStencilOpState{}, | ||
| 168 | .minDepthBounds = 0.0f, | ||
| 169 | .maxDepthBounds = 0.0f, | ||
| 170 | }; | ||
| 171 | |||
| 172 | template <VkFilter filter> | ||
| 173 | inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ | ||
| 174 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 175 | .pNext = nullptr, | ||
| 176 | .flags = 0, | ||
| 177 | .magFilter = filter, | ||
| 178 | .minFilter = filter, | ||
| 179 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, | ||
| 180 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 181 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 182 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 183 | .mipLodBias = 0.0f, | ||
| 184 | .anisotropyEnable = VK_FALSE, | ||
| 185 | .maxAnisotropy = 0.0f, | ||
| 186 | .compareEnable = VK_FALSE, | ||
| 187 | .compareOp = VK_COMPARE_OP_NEVER, | ||
| 188 | .minLod = 0.0f, | ||
| 189 | .maxLod = 0.0f, | ||
| 190 | .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, | ||
| 191 | .unnormalizedCoordinates = VK_TRUE, | ||
| 192 | }; | ||
| 193 | |||
| 194 | constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( | ||
| 195 | const VkDescriptorSetLayout* set_layout) { | ||
| 196 | return VkPipelineLayoutCreateInfo{ | ||
| 197 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 198 | .pNext = nullptr, | ||
| 199 | .flags = 0, | ||
| 200 | .setLayoutCount = 1, | ||
| 201 | .pSetLayouts = set_layout, | ||
| 202 | .pushConstantRangeCount = 1, | ||
| 203 | .pPushConstantRanges = &PUSH_CONSTANT_RANGE, | ||
| 204 | }; | ||
| 205 | } | ||
| 206 | |||
| 207 | constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, | ||
| 208 | VkShaderModule shader) { | ||
| 209 | return VkPipelineShaderStageCreateInfo{ | ||
| 210 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 211 | .pNext = nullptr, | ||
| 212 | .flags = 0, | ||
| 213 | .stage = stage, | ||
| 214 | .module = shader, | ||
| 215 | .pName = "main", | ||
| 216 | .pSpecializationInfo = nullptr, | ||
| 217 | }; | ||
| 218 | } | ||
| 219 | |||
| 220 | constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages( | ||
| 221 | VkShaderModule vertex_shader, VkShaderModule fragment_shader) { | ||
| 222 | return std::array{ | ||
| 223 | PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), | ||
| 224 | PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), | ||
| 225 | }; | ||
| 226 | } | ||
| 227 | |||
| 228 | void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, | ||
| 229 | VkSampler sampler, VkImageView image_view) { | ||
| 230 | const VkDescriptorImageInfo image_info{ | ||
| 231 | .sampler = sampler, | ||
| 232 | .imageView = image_view, | ||
| 233 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 234 | }; | ||
| 235 | const VkWriteDescriptorSet write_descriptor_set{ | ||
| 236 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 237 | .pNext = nullptr, | ||
| 238 | .dstSet = descriptor_set, | ||
| 239 | .dstBinding = 0, | ||
| 240 | .dstArrayElement = 0, | ||
| 241 | .descriptorCount = 1, | ||
| 242 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 243 | .pImageInfo = &image_info, | ||
| 244 | .pBufferInfo = nullptr, | ||
| 245 | .pTexelBufferView = nullptr, | ||
| 246 | }; | ||
| 247 | device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); | ||
| 248 | } | ||
| 249 | |||
| 250 | void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, | ||
| 251 | VkSampler sampler, VkImageView image_view_0, | ||
| 252 | VkImageView image_view_1) { | ||
| 253 | const VkDescriptorImageInfo image_info_0{ | ||
| 254 | .sampler = sampler, | ||
| 255 | .imageView = image_view_0, | ||
| 256 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 257 | }; | ||
| 258 | const VkDescriptorImageInfo image_info_1{ | ||
| 259 | .sampler = sampler, | ||
| 260 | .imageView = image_view_1, | ||
| 261 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 262 | }; | ||
| 263 | const std::array write_descriptor_sets{ | ||
| 264 | VkWriteDescriptorSet{ | ||
| 265 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 266 | .pNext = nullptr, | ||
| 267 | .dstSet = descriptor_set, | ||
| 268 | .dstBinding = 0, | ||
| 269 | .dstArrayElement = 0, | ||
| 270 | .descriptorCount = 1, | ||
| 271 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 272 | .pImageInfo = &image_info_0, | ||
| 273 | .pBufferInfo = nullptr, | ||
| 274 | .pTexelBufferView = nullptr, | ||
| 275 | }, | ||
| 276 | VkWriteDescriptorSet{ | ||
| 277 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 278 | .pNext = nullptr, | ||
| 279 | .dstSet = descriptor_set, | ||
| 280 | .dstBinding = 1, | ||
| 281 | .dstArrayElement = 0, | ||
| 282 | .descriptorCount = 1, | ||
| 283 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 284 | .pImageInfo = &image_info_1, | ||
| 285 | .pBufferInfo = nullptr, | ||
| 286 | .pTexelBufferView = nullptr, | ||
| 287 | }, | ||
| 288 | }; | ||
| 289 | device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); | ||
| 290 | } | ||
| 291 | |||
| 292 | void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, | ||
| 293 | const std::array<Offset2D, 2>& dst_region, | ||
| 294 | const std::array<Offset2D, 2>& src_region) { | ||
| 295 | const VkOffset2D offset{ | ||
| 296 | .x = std::min(dst_region[0].x, dst_region[1].x), | ||
| 297 | .y = std::min(dst_region[0].y, dst_region[1].y), | ||
| 298 | }; | ||
| 299 | const VkExtent2D extent{ | ||
| 300 | .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), | ||
| 301 | .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), | ||
| 302 | }; | ||
| 303 | const VkViewport viewport{ | ||
| 304 | .x = static_cast<float>(offset.x), | ||
| 305 | .y = static_cast<float>(offset.y), | ||
| 306 | .width = static_cast<float>(extent.width), | ||
| 307 | .height = static_cast<float>(extent.height), | ||
| 308 | .minDepth = 0.0f, | ||
| 309 | .maxDepth = 1.0f, | ||
| 310 | }; | ||
| 311 | // TODO: Support scissored blits | ||
| 312 | const VkRect2D scissor{ | ||
| 313 | .offset = offset, | ||
| 314 | .extent = extent, | ||
| 315 | }; | ||
| 316 | const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); | ||
| 317 | const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); | ||
| 318 | const PushConstants push_constants{ | ||
| 319 | .tex_scale = {scale_x, scale_y}, | ||
| 320 | .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, | ||
| 321 | }; | ||
| 322 | cmdbuf.SetViewport(0, viewport); | ||
| 323 | cmdbuf.SetScissor(0, scissor); | ||
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 325 | } | ||
| 326 | |||
| 327 | } // Anonymous namespace | ||
| 328 | |||
| 329 | BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, | ||
| 330 | StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) | ||
| 331 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, | ||
| 332 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( | ||
| 333 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | ||
| 334 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( | ||
| 335 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | ||
| 336 | one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), | ||
| 337 | two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), | ||
| 338 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( | ||
| 339 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), | ||
| 340 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( | ||
| 341 | PipelineLayoutCreateInfo(two_textures_set_layout.address()))), | ||
| 342 | full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), | ||
| 343 | blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), | ||
| 344 | convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), | ||
| 345 | convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), | ||
| 346 | linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), | ||
| 347 | nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { | ||
| 348 | if (device.IsExtShaderStencilExportSupported()) { | ||
| 349 | blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | BlitImageHelper::~BlitImageHelper() = default; | ||
| 354 | |||
| 355 | void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | ||
| 356 | const std::array<Offset2D, 2>& dst_region, | ||
| 357 | const std::array<Offset2D, 2>& src_region, | ||
| 358 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 359 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 360 | const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; | ||
| 361 | const BlitImagePipelineKey key{ | ||
| 362 | .renderpass = dst_framebuffer->RenderPass(), | ||
| 363 | .operation = operation, | ||
| 364 | }; | ||
| 365 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | ||
| 366 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | ||
| 367 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; | ||
| 368 | const VkPipeline pipeline = FindOrEmplacePipeline(key); | ||
| 369 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 370 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 371 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, | ||
| 372 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 373 | // TODO: Barriers | ||
| 374 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | ||
| 375 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 376 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 377 | nullptr); | ||
| 378 | BindBlitState(cmdbuf, layout, dst_region, src_region); | ||
| 379 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 380 | }); | ||
| 381 | scheduler.InvalidateState(); | ||
| 382 | } | ||
| 383 | |||
| 384 | void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | ||
| 385 | VkImageView src_depth_view, VkImageView src_stencil_view, | ||
| 386 | const std::array<Offset2D, 2>& dst_region, | ||
| 387 | const std::array<Offset2D, 2>& src_region, | ||
| 388 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 389 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 390 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); | ||
| 391 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); | ||
| 392 | |||
| 393 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | ||
| 394 | const VkSampler sampler = *nearest_sampler; | ||
| 395 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); | ||
| 396 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 397 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 398 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, | ||
| 399 | src_stencil_view, descriptor_set, | ||
| 400 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 401 | // TODO: Barriers | ||
| 402 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, | ||
| 403 | src_stencil_view); | ||
| 404 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 405 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 406 | nullptr); | ||
| 407 | BindBlitState(cmdbuf, layout, dst_region, src_region); | ||
| 408 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 409 | }); | ||
| 410 | scheduler.InvalidateState(); | ||
| 411 | } | ||
| 412 | |||
| 413 | void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, | ||
| 414 | const ImageView& src_image_view) { | ||
| 415 | ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); | ||
| 416 | Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); | ||
| 417 | } | ||
| 418 | |||
| 419 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, | ||
| 420 | const ImageView& src_image_view) { | ||
| 421 | |||
| 422 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); | ||
| 423 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); | ||
| 424 | } | ||
| 425 | |||
| 426 | void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, | ||
| 427 | const ImageView& src_image_view) { | ||
| 428 | ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); | ||
| 429 | Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); | ||
| 430 | } | ||
| 431 | |||
| 432 | void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | ||
| 433 | const ImageView& src_image_view) { | ||
| 434 | ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); | ||
| 435 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); | ||
| 436 | } | ||
| 437 | |||
| 438 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 439 | const ImageView& src_image_view) { | ||
| 440 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | ||
| 441 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | ||
| 442 | const VkSampler sampler = *nearest_sampler; | ||
| 443 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 444 | const VkExtent2D extent{ | ||
| 445 | .width = src_image_view.size.width, | ||
| 446 | .height = src_image_view.size.height, | ||
| 447 | }; | ||
| 448 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 449 | scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, | ||
| 450 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 451 | const VkOffset2D offset{ | ||
| 452 | .x = 0, | ||
| 453 | .y = 0, | ||
| 454 | }; | ||
| 455 | const VkViewport viewport{ | ||
| 456 | .x = 0.0f, | ||
| 457 | .y = 0.0f, | ||
| 458 | .width = static_cast<float>(extent.width), | ||
| 459 | .height = static_cast<float>(extent.height), | ||
| 460 | .minDepth = 0.0f, | ||
| 461 | .maxDepth = 0.0f, | ||
| 462 | }; | ||
| 463 | const VkRect2D scissor{ | ||
| 464 | .offset = offset, | ||
| 465 | .extent = extent, | ||
| 466 | }; | ||
| 467 | const PushConstants push_constants{ | ||
| 468 | .tex_scale = {viewport.width, viewport.height}, | ||
| 469 | .tex_offset = {0.0f, 0.0f}, | ||
| 470 | }; | ||
| 471 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | ||
| 472 | |||
| 473 | // TODO: Barriers | ||
| 474 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 475 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 476 | nullptr); | ||
| 477 | cmdbuf.SetViewport(0, viewport); | ||
| 478 | cmdbuf.SetScissor(0, scissor); | ||
| 479 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 480 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 481 | }); | ||
| 482 | scheduler.InvalidateState(); | ||
| 483 | } | ||
| 484 | |||
| 485 | VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { | ||
| 486 | const auto it = std::ranges::find(blit_color_keys, key); | ||
| 487 | if (it != blit_color_keys.end()) { | ||
| 488 | return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; | ||
| 489 | } | ||
| 490 | blit_color_keys.push_back(key); | ||
| 491 | |||
| 492 | const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); | ||
| 493 | const VkPipelineColorBlendAttachmentState blend_attachment{ | ||
| 494 | .blendEnable = VK_FALSE, | ||
| 495 | .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 496 | .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 497 | .colorBlendOp = VK_BLEND_OP_ADD, | ||
| 498 | .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 499 | .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 500 | .alphaBlendOp = VK_BLEND_OP_ADD, | ||
| 501 | .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 502 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, | ||
| 503 | }; | ||
| 504 | // TODO: programmable blending | ||
| 505 | const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ | ||
| 506 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 507 | .pNext = nullptr, | ||
| 508 | .flags = 0, | ||
| 509 | .logicOpEnable = VK_FALSE, | ||
| 510 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 511 | .attachmentCount = 1, | ||
| 512 | .pAttachments = &blend_attachment, | ||
| 513 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 514 | }; | ||
| 515 | blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ | ||
| 516 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 517 | .pNext = nullptr, | ||
| 518 | .flags = 0, | ||
| 519 | .stageCount = static_cast<u32>(stages.size()), | ||
| 520 | .pStages = stages.data(), | ||
| 521 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 522 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 523 | .pTessellationState = nullptr, | ||
| 524 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 525 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 526 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 527 | .pDepthStencilState = nullptr, | ||
| 528 | .pColorBlendState = &color_blend_create_info, | ||
| 529 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 530 | .layout = *one_texture_pipeline_layout, | ||
| 531 | .renderPass = key.renderpass, | ||
| 532 | .subpass = 0, | ||
| 533 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 534 | .basePipelineIndex = 0, | ||
| 535 | })); | ||
| 536 | return *blit_color_pipelines.back(); | ||
| 537 | } | ||
| 538 | |||
| 539 | VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { | ||
| 540 | if (blit_depth_stencil_pipeline) { | ||
| 541 | return *blit_depth_stencil_pipeline; | ||
| 542 | } | ||
| 543 | const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); | ||
| 544 | blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 545 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 546 | .pNext = nullptr, | ||
| 547 | .flags = 0, | ||
| 548 | .stageCount = static_cast<u32>(stages.size()), | ||
| 549 | .pStages = stages.data(), | ||
| 550 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 551 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 552 | .pTessellationState = nullptr, | ||
| 553 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 554 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 555 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 556 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 557 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, | ||
| 558 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 559 | .layout = *two_textures_pipeline_layout, | ||
| 560 | .renderPass = renderpass, | ||
| 561 | .subpass = 0, | ||
| 562 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 563 | .basePipelineIndex = 0, | ||
| 564 | }); | ||
| 565 | return *blit_depth_stencil_pipeline; | ||
| 566 | } | ||
| 567 | |||
| 568 | void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { | ||
| 569 | if (pipeline) { | ||
| 570 | return; | ||
| 571 | } | ||
| 572 | const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); | ||
| 573 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 574 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 575 | .pNext = nullptr, | ||
| 576 | .flags = 0, | ||
| 577 | .stageCount = static_cast<u32>(stages.size()), | ||
| 578 | .pStages = stages.data(), | ||
| 579 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 580 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 581 | .pTessellationState = nullptr, | ||
| 582 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 583 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 584 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 585 | .pDepthStencilState = nullptr, | ||
| 586 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, | ||
| 587 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 588 | .layout = *one_texture_pipeline_layout, | ||
| 589 | .renderPass = renderpass, | ||
| 590 | .subpass = 0, | ||
| 591 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 592 | .basePipelineIndex = 0, | ||
| 593 | }); | ||
| 594 | } | ||
| 595 | |||
| 596 | void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { | ||
| 597 | if (pipeline) { | ||
| 598 | return; | ||
| 599 | } | ||
| 600 | const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); | ||
| 601 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 602 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 603 | .pNext = nullptr, | ||
| 604 | .flags = 0, | ||
| 605 | .stageCount = static_cast<u32>(stages.size()), | ||
| 606 | .pStages = stages.data(), | ||
| 607 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 608 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 609 | .pTessellationState = nullptr, | ||
| 610 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 611 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 612 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 613 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 614 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, | ||
| 615 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 616 | .layout = *one_texture_pipeline_layout, | ||
| 617 | .renderPass = renderpass, | ||
| 618 | .subpass = 0, | ||
| 619 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 620 | .basePipelineIndex = 0, | ||
| 621 | }); | ||
| 622 | } | ||
| 623 | |||
| 624 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..2c2790bf9 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | |||
| 9 | #include "video_core/engines/fermi_2d.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | using VideoCommon::Offset2D; | ||
| 17 | |||
| 18 | class VKDevice; | ||
| 19 | class VKScheduler; | ||
| 20 | class StateTracker; | ||
| 21 | |||
| 22 | class Framebuffer; | ||
| 23 | class ImageView; | ||
| 24 | |||
| 25 | struct BlitImagePipelineKey { | ||
| 26 | constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; | ||
| 27 | |||
| 28 | VkRenderPass renderpass; | ||
| 29 | Tegra::Engines::Fermi2D::Operation operation; | ||
| 30 | }; | ||
| 31 | |||
| 32 | class BlitImageHelper { | ||
| 33 | public: | ||
| 34 | explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, | ||
| 35 | StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); | ||
| 36 | ~BlitImageHelper(); | ||
| 37 | |||
| 38 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | ||
| 39 | const std::array<Offset2D, 2>& dst_region, | ||
| 40 | const std::array<Offset2D, 2>& src_region, | ||
| 41 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 42 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 43 | |||
| 44 | void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, | ||
| 45 | VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, | ||
| 46 | const std::array<Offset2D, 2>& src_region, | ||
| 47 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 48 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 49 | |||
| 50 | void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 51 | |||
| 52 | void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 53 | |||
| 54 | void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 55 | |||
| 56 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 57 | |||
| 58 | private: | ||
| 59 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 60 | const ImageView& src_image_view); | ||
| 61 | |||
| 62 | [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); | ||
| 63 | |||
| 64 | [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); | ||
| 65 | |||
| 66 | void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | ||
| 67 | |||
| 68 | void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | ||
| 69 | |||
| 70 | const VKDevice& device; | ||
| 71 | VKScheduler& scheduler; | ||
| 72 | StateTracker& state_tracker; | ||
| 73 | |||
| 74 | vk::DescriptorSetLayout one_texture_set_layout; | ||
| 75 | vk::DescriptorSetLayout two_textures_set_layout; | ||
| 76 | DescriptorAllocator one_texture_descriptor_allocator; | ||
| 77 | DescriptorAllocator two_textures_descriptor_allocator; | ||
| 78 | vk::PipelineLayout one_texture_pipeline_layout; | ||
| 79 | vk::PipelineLayout two_textures_pipeline_layout; | ||
| 80 | vk::ShaderModule full_screen_vert; | ||
| 81 | vk::ShaderModule blit_color_to_color_frag; | ||
| 82 | vk::ShaderModule blit_depth_stencil_frag; | ||
| 83 | vk::ShaderModule convert_depth_to_float_frag; | ||
| 84 | vk::ShaderModule convert_float_to_depth_frag; | ||
| 85 | vk::Sampler linear_sampler; | ||
| 86 | vk::Sampler nearest_sampler; | ||
| 87 | |||
| 88 | std::vector<BlitImagePipelineKey> blit_color_keys; | ||
| 89 | std::vector<vk::Pipeline> blit_color_pipelines; | ||
| 90 | vk::Pipeline blit_depth_stencil_pipeline; | ||
| 91 | vk::Pipeline convert_d32_to_r32_pipeline; | ||
| 92 | vk::Pipeline convert_r32_to_d32_pipeline; | ||
| 93 | vk::Pipeline convert_d16_to_r16_pipeline; | ||
| 94 | vk::Pipeline convert_r16_to_d16_pipeline; | ||
| 95 | }; | ||
| 96 | |||
| 97 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..67dd10500 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta | |||
| 60 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); | 60 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); |
| 61 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | 61 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); |
| 62 | topology.Assign(regs.draw.topology); | 62 | topology.Assign(regs.draw.topology); |
| 63 | msaa_mode.Assign(regs.multisample_mode); | ||
| 63 | 64 | ||
| 64 | raw2 = 0; | 65 | raw2 = 0; |
| 65 | const auto test_func = | 66 | const auto test_func = |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..7e95e6fce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -186,6 +186,7 @@ struct FixedPipelineState { | |||
| 186 | BitField<19, 4, u32> logic_op; | 186 | BitField<19, 4, u32> logic_op; |
| 187 | BitField<23, 1, u32> rasterize_enable; | 187 | BitField<23, 1, u32> rasterize_enable; |
| 188 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; | 188 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; |
| 189 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; | ||
| 189 | }; | 190 | }; |
| 190 | union { | 191 | union { |
| 191 | u32 raw2; | 192 | u32 raw2; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 58e117eb3..4c988429f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -122,7 +122,7 @@ struct FormatTuple { | |||
| 122 | {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT | 122 | {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT |
| 123 | {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT | 123 | {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT |
| 124 | {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM | 124 | {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM |
| 125 | {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM | 125 | {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM |
| 126 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM | 126 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM |
| 127 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM | 127 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM |
| 128 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT | 128 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT |
| @@ -163,7 +163,7 @@ struct FormatTuple { | |||
| 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM | 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM |
| 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT | 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT |
| 165 | {VK_FORMAT_UNDEFINED}, // R16G16_UINT | 165 | {VK_FORMAT_UNDEFINED}, // R16G16_UINT |
| 166 | {VK_FORMAT_UNDEFINED}, // R16G16_SINT | 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT |
| 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM | 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM |
| 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT | 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT |
| 169 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB | 169 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB |
| @@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo | |||
| 233 | 233 | ||
| 234 | // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively | 234 | // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively |
| 235 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { | 235 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { |
| 236 | tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) | 236 | const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); |
| 237 | ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 | 237 | tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32; |
| 238 | : VK_FORMAT_A8B8G8R8_UNORM_PACK32; | ||
| 239 | } | 238 | } |
| 240 | const bool attachable = tuple.usage & Attachable; | 239 | const bool attachable = tuple.usage & Attachable; |
| 241 | const bool storage = tuple.usage & Storage; | 240 | const bool storage = tuple.usage & Storage; |
| 242 | 241 | ||
| 243 | VkFormatFeatureFlags usage; | 242 | VkFormatFeatureFlags usage{}; |
| 244 | if (format_type == FormatType::Buffer) { | 243 | switch (format_type) { |
| 244 | case FormatType::Buffer: | ||
| 245 | usage = | 245 | usage = |
| 246 | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; | 246 | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; |
| 247 | } else { | 247 | break; |
| 248 | case FormatType::Linear: | ||
| 249 | case FormatType::Optimal: | ||
| 248 | usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | | 250 | usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | |
| 249 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; | 251 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; |
| 250 | if (attachable) { | 252 | if (attachable) { |
| @@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo | |||
| 254 | if (storage) { | 256 | if (storage) { |
| 255 | usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; | 257 | usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; |
| 256 | } | 258 | } |
| 259 | break; | ||
| 257 | } | 260 | } |
| 258 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; | 261 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; |
| 259 | } | 262 | } |
| @@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) | |||
| 724 | return {}; | 727 | return {}; |
| 725 | } | 728 | } |
| 726 | 729 | ||
| 730 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { | ||
| 731 | switch (reduction) { | ||
| 732 | case Tegra::Texture::SamplerReduction::WeightedAverage: | ||
| 733 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | ||
| 734 | case Tegra::Texture::SamplerReduction::Min: | ||
| 735 | return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; | ||
| 736 | case Tegra::Texture::SamplerReduction::Max: | ||
| 737 | return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; | ||
| 738 | } | ||
| 739 | UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction)); | ||
| 740 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | ||
| 741 | } | ||
| 742 | |||
| 727 | } // namespace Vulkan::MaxwellToVK | 743 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..1a90f192e 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | |||
| 61 | 61 | ||
| 62 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); | 62 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); |
| 63 | 63 | ||
| 64 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); | ||
| 65 | |||
| 64 | } // namespace Vulkan::MaxwellToVK | 66 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ea4b7c1e6..7f521cb9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() { | |||
| 92 | return library; | 92 | return library; |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | std::pair<vk::Instance, u32> CreateInstance( | 95 | std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library, |
| 96 | Common::DynamicLibrary& library, vk::InstanceDispatch& dld, | 96 | vk::InstanceDispatch& dld, WindowSystemType window_type, |
| 97 | WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { | 97 | bool enable_debug_utils, bool enable_layers) { |
| 98 | if (!library.IsOpen()) { | 98 | if (!library.IsOpen()) { |
| 99 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); | 99 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); |
| 100 | return {}; | 100 | return {}; |
| @@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance( | |||
| 133 | if (window_type != Core::Frontend::WindowSystemType::Headless) { | 133 | if (window_type != Core::Frontend::WindowSystemType::Headless) { |
| 134 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | 134 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); |
| 135 | } | 135 | } |
| 136 | if (enable_layers) { | 136 | if (enable_debug_utils) { |
| 137 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | 137 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); |
| 138 | } | 138 | } |
| 139 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | 139 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); |
| @@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 287 | bool RendererVulkan::Init() { | 287 | bool RendererVulkan::Init() { |
| 288 | library = OpenVulkanLibrary(); | 288 | library = OpenVulkanLibrary(); |
| 289 | std::tie(instance, instance_version) = CreateInstance( | 289 | std::tie(instance, instance_version) = CreateInstance( |
| 290 | library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); | 290 | library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); |
| 291 | if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { | 291 | if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { |
| 292 | return false; | 292 | return false; |
| 293 | } | 293 | } |
| @@ -447,7 +447,8 @@ void RendererVulkan::Report() const { | |||
| 447 | std::vector<std::string> RendererVulkan::EnumerateDevices() { | 447 | std::vector<std::string> RendererVulkan::EnumerateDevices() { |
| 448 | vk::InstanceDispatch dld; | 448 | vk::InstanceDispatch dld; |
| 449 | Common::DynamicLibrary library = OpenVulkanLibrary(); | 449 | Common::DynamicLibrary library = OpenVulkanLibrary(); |
| 450 | vk::Instance instance = CreateInstance(library, dld).first; | 450 | vk::Instance instance = |
| 451 | CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; | ||
| 451 | if (!instance) { | 452 | if (!instance) { |
| 452 | return {}; | 453 | return {}; |
| 453 | } | 454 | } |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 977b86003..74642fba4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -33,10 +33,9 @@ class VKDevice; | |||
| 33 | class VKMemoryManager; | 33 | class VKMemoryManager; |
| 34 | class VKSwapchain; | 34 | class VKSwapchain; |
| 35 | class VKScheduler; | 35 | class VKScheduler; |
| 36 | class VKImage; | ||
| 37 | 36 | ||
| 38 | struct VKScreenInfo { | 37 | struct VKScreenInfo { |
| 39 | VKImage* image{}; | 38 | VkImageView image_view{}; |
| 40 | u32 width{}; | 39 | u32 width{}; |
| 41 | u32 height{}; | 40 | u32 height{}; |
| 42 | bool is_srgb{}; | 41 | bool is_srgb{}; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..d3a83f22f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -16,12 +16,12 @@ | |||
| 16 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| 17 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 18 | #include "video_core/gpu.h" | 18 | #include "video_core/gpu.h" |
| 19 | #include "video_core/morton.h" | 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | ||
| 20 | #include "video_core/rasterizer_interface.h" | 21 | #include "video_core/rasterizer_interface.h" |
| 21 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 22 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 22 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 23 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 23 | #include "video_core/renderer_vulkan/vk_device.h" | 24 | #include "video_core/renderer_vulkan/vk_device.h" |
| 24 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 25 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 26 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 26 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -29,108 +29,12 @@ | |||
| 29 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 29 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 30 | #include "video_core/renderer_vulkan/wrapper.h" | 30 | #include "video_core/renderer_vulkan/wrapper.h" |
| 31 | #include "video_core/surface.h" | 31 | #include "video_core/surface.h" |
| 32 | #include "video_core/textures/decoders.h" | ||
| 32 | 33 | ||
| 33 | namespace Vulkan { | 34 | namespace Vulkan { |
| 34 | 35 | ||
| 35 | namespace { | 36 | namespace { |
| 36 | 37 | ||
| 37 | // Generated from the "shaders/" directory, read the instructions there. | ||
| 38 | constexpr u8 blit_vertex_code[] = { | ||
| 39 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 40 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 41 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 42 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 43 | 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 44 | 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 45 | 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 46 | 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 47 | 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, | ||
| 48 | 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 49 | 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 50 | 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 51 | 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, | ||
| 52 | 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 53 | 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 54 | 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 55 | 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 56 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 57 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 58 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 59 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 60 | 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||
| 61 | 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 62 | 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 63 | 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 64 | 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 65 | 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 66 | 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 67 | 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 68 | 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 69 | 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 70 | 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, | ||
| 71 | 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, | ||
| 72 | 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, | ||
| 73 | 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 74 | 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 75 | 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, | ||
| 76 | 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, | ||
| 77 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 78 | 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 79 | 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 80 | 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 81 | 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 82 | 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 83 | 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 84 | 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 85 | 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 86 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 87 | 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 88 | 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 89 | 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, | ||
| 90 | 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 91 | 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, | ||
| 92 | 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 93 | 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 94 | 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, | ||
| 95 | 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, | ||
| 96 | 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 97 | 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 98 | 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 99 | 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, | ||
| 100 | 0x38, 0x00, 0x01, 0x00}; | ||
| 101 | |||
| 102 | constexpr u8 blit_fragment_code[] = { | ||
| 103 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, | ||
| 104 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 105 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 106 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 107 | 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 108 | 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, | ||
| 109 | 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 110 | 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 111 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 112 | 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 113 | 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 114 | 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, | ||
| 115 | 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 116 | 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, | ||
| 117 | 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, | ||
| 118 | 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||
| 119 | 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 120 | 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, | ||
| 121 | 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 122 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 123 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, | ||
| 124 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 125 | 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 126 | 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 127 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 128 | 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 129 | 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 130 | 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 131 | 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 132 | 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||
| 133 | |||
| 134 | struct ScreenRectVertex { | 38 | struct ScreenRectVertex { |
| 135 | ScreenRectVertex() = default; | 39 | ScreenRectVertex() = default; |
| 136 | explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} | 40 | explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} |
| @@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { | |||
| 173 | // clang-format on | 77 | // clang-format on |
| 174 | } | 78 | } |
| 175 | 79 | ||
| 176 | std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { | 80 | u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { |
| 177 | using namespace VideoCore::Surface; | 81 | using namespace VideoCore::Surface; |
| 178 | return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); | 82 | return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); |
| 179 | } | 83 | } |
| 180 | 84 | ||
| 181 | std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { | 85 | std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { |
| @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 239 | scheduler.Wait(resource_ticks[image_index]); | 143 | scheduler.Wait(resource_ticks[image_index]); |
| 240 | resource_ticks[image_index] = scheduler.CurrentTick(); | 144 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 241 | 145 | ||
| 242 | VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); | 146 | UpdateDescriptorSet(image_index, |
| 243 | 147 | use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); | |
| 244 | UpdateDescriptorSet(image_index, blit_image->GetPresentView()); | ||
| 245 | 148 | ||
| 246 | BufferData data; | 149 | BufferData data; |
| 247 | SetUniformData(data, framebuffer); | 150 | SetUniformData(data, framebuffer); |
| 248 | SetVertexData(data, framebuffer); | 151 | SetVertexData(data, framebuffer); |
| 249 | 152 | ||
| 250 | auto map = buffer_commit->Map(); | 153 | auto map = buffer_commit->Map(); |
| 251 | std::memcpy(map.GetAddress(), &data, sizeof(data)); | 154 | std::memcpy(map.Address(), &data, sizeof(data)); |
| 252 | 155 | ||
| 253 | if (!use_accelerated) { | 156 | if (!use_accelerated) { |
| 254 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 157 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); |
| 255 | 158 | ||
| 256 | const auto pixel_format = | ||
| 257 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); | ||
| 258 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 159 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 259 | const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 160 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| 260 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); | 161 | const size_t size_bytes = GetSizeInBytes(framebuffer); |
| 162 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); | ||
| 261 | 163 | ||
| 262 | // TODO(Rodrigo): Read this from HLE | 164 | // TODO(Rodrigo): Read this from HLE |
| 263 | constexpr u32 block_height_log2 = 4; | 165 | constexpr u32 block_height_log2 = 4; |
| 264 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | 166 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); |
| 265 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | 167 | Tegra::Texture::UnswizzleTexture( |
| 266 | map.GetAddress() + image_offset, host_ptr); | 168 | std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), |
| 267 | 169 | bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | |
| 268 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 269 | VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | ||
| 270 | 170 | ||
| 271 | const VkBufferImageCopy copy{ | 171 | const VkBufferImageCopy copy{ |
| 272 | .bufferOffset = image_offset, | 172 | .bufferOffset = image_offset, |
| @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 288 | }, | 188 | }, |
| 289 | }; | 189 | }; |
| 290 | scheduler.Record( | 190 | scheduler.Record( |
| 291 | [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { | 191 | [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { |
| 292 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | 192 | const VkImageMemoryBarrier base_barrier{ |
| 193 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 194 | .pNext = nullptr, | ||
| 195 | .srcAccessMask = 0, | ||
| 196 | .dstAccessMask = 0, | ||
| 197 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 198 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 199 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 200 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 201 | .image = image, | ||
| 202 | .subresourceRange = | ||
| 203 | { | ||
| 204 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 205 | .baseMipLevel = 0, | ||
| 206 | .levelCount = 1, | ||
| 207 | .baseArrayLayer = 0, | ||
| 208 | .layerCount = 1, | ||
| 209 | }, | ||
| 210 | }; | ||
| 211 | VkImageMemoryBarrier read_barrier = base_barrier; | ||
| 212 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; | ||
| 213 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 214 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 215 | |||
| 216 | VkImageMemoryBarrier write_barrier = base_barrier; | ||
| 217 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 218 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | ||
| 219 | |||
| 220 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 221 | 0, read_barrier); | ||
| 222 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); | ||
| 223 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 224 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); | ||
| 293 | }); | 225 | }); |
| 294 | } | 226 | } |
| 295 | map.Release(); | 227 | map.Release(); |
| 296 | 228 | ||
| 297 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, | ||
| 298 | VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); | ||
| 299 | |||
| 300 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], | 229 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], |
| 301 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, | 230 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, |
| 302 | size = swapchain.GetSize(), pipeline = *pipeline, | 231 | size = swapchain.GetSize(), pipeline = *pipeline, |
| @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 304 | const VkClearValue clear_color{ | 233 | const VkClearValue clear_color{ |
| 305 | .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, | 234 | .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, |
| 306 | }; | 235 | }; |
| 307 | 236 | const VkRenderPassBeginInfo renderpass_bi{ | |
| 308 | VkRenderPassBeginInfo renderpass_bi; | 237 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 309 | renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; | 238 | .pNext = nullptr, |
| 310 | renderpass_bi.pNext = nullptr; | 239 | .renderPass = renderpass, |
| 311 | renderpass_bi.renderPass = renderpass; | 240 | .framebuffer = framebuffer, |
| 312 | renderpass_bi.framebuffer = framebuffer; | 241 | .renderArea = |
| 313 | renderpass_bi.renderArea.offset.x = 0; | 242 | { |
| 314 | renderpass_bi.renderArea.offset.y = 0; | 243 | .offset = {0, 0}, |
| 315 | renderpass_bi.renderArea.extent = size; | 244 | .extent = size, |
| 316 | renderpass_bi.clearValueCount = 1; | 245 | }, |
| 317 | renderpass_bi.pClearValues = &clear_color; | 246 | .clearValueCount = 1, |
| 318 | 247 | .pClearValues = &clear_color, | |
| 319 | VkViewport viewport; | 248 | }; |
| 320 | viewport.x = 0.0f; | 249 | const VkViewport viewport{ |
| 321 | viewport.y = 0.0f; | 250 | .x = 0.0f, |
| 322 | viewport.width = static_cast<float>(size.width); | 251 | .y = 0.0f, |
| 323 | viewport.height = static_cast<float>(size.height); | 252 | .width = static_cast<float>(size.width), |
| 324 | viewport.minDepth = 0.0f; | 253 | .height = static_cast<float>(size.height), |
| 325 | viewport.maxDepth = 1.0f; | 254 | .minDepth = 0.0f, |
| 326 | 255 | .maxDepth = 1.0f, | |
| 327 | VkRect2D scissor; | 256 | }; |
| 328 | scissor.offset.x = 0; | 257 | const VkRect2D scissor{ |
| 329 | scissor.offset.y = 0; | 258 | .offset = {0, 0}, |
| 330 | scissor.extent = size; | 259 | .extent = size, |
| 331 | 260 | }; | |
| 332 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 261 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 333 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 262 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| 334 | cmdbuf.SetViewport(0, viewport); | 263 | cmdbuf.SetViewport(0, viewport); |
| @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) | |||
| 372 | } | 301 | } |
| 373 | 302 | ||
| 374 | void VKBlitScreen::CreateShaders() { | 303 | void VKBlitScreen::CreateShaders() { |
| 375 | vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); | 304 | vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); |
| 376 | fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); | 305 | fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); |
| 377 | } | 306 | } |
| 378 | 307 | ||
| 379 | void VKBlitScreen::CreateSemaphores() { | 308 | void VKBlitScreen::CreateSemaphores() { |
| @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() { | |||
| 420 | 349 | ||
| 421 | const VkAttachmentReference color_attachment_ref{ | 350 | const VkAttachmentReference color_attachment_ref{ |
| 422 | .attachment = 0, | 351 | .attachment = 0, |
| 423 | .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, | 352 | .layout = VK_IMAGE_LAYOUT_GENERAL, |
| 424 | }; | 353 | }; |
| 425 | 354 | ||
| 426 | const VkSubpassDescription subpass_description{ | 355 | const VkSubpassDescription subpass_description{ |
| @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff | |||
| 735 | 664 | ||
| 736 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | 665 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { |
| 737 | raw_images.resize(image_count); | 666 | raw_images.resize(image_count); |
| 667 | raw_image_views.resize(image_count); | ||
| 738 | raw_buffer_commits.resize(image_count); | 668 | raw_buffer_commits.resize(image_count); |
| 739 | 669 | ||
| 740 | const VkImageCreateInfo ci{ | 670 | for (size_t i = 0; i < image_count; ++i) { |
| 741 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 671 | raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ |
| 742 | .pNext = nullptr, | 672 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 743 | .flags = 0, | 673 | .pNext = nullptr, |
| 744 | .imageType = VK_IMAGE_TYPE_2D, | 674 | .flags = 0, |
| 745 | .format = GetFormat(framebuffer), | 675 | .imageType = VK_IMAGE_TYPE_2D, |
| 746 | .extent = | 676 | .format = GetFormat(framebuffer), |
| 747 | { | 677 | .extent = |
| 748 | .width = framebuffer.width, | 678 | { |
| 749 | .height = framebuffer.height, | 679 | .width = framebuffer.width, |
| 750 | .depth = 1, | 680 | .height = framebuffer.height, |
| 751 | }, | 681 | .depth = 1, |
| 752 | .mipLevels = 1, | 682 | }, |
| 753 | .arrayLayers = 1, | 683 | .mipLevels = 1, |
| 754 | .samples = VK_SAMPLE_COUNT_1_BIT, | 684 | .arrayLayers = 1, |
| 755 | .tiling = VK_IMAGE_TILING_LINEAR, | 685 | .samples = VK_SAMPLE_COUNT_1_BIT, |
| 756 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, | 686 | .tiling = VK_IMAGE_TILING_LINEAR, |
| 757 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 687 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, |
| 758 | .queueFamilyIndexCount = 0, | 688 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 759 | .pQueueFamilyIndices = nullptr, | 689 | .queueFamilyIndexCount = 0, |
| 760 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 690 | .pQueueFamilyIndices = nullptr, |
| 761 | }; | 691 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 762 | 692 | }); | |
| 763 | for (std::size_t i = 0; i < image_count; ++i) { | 693 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); |
| 764 | raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); | 694 | raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |
| 765 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); | 695 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 696 | .pNext = nullptr, | ||
| 697 | .flags = 0, | ||
| 698 | .image = *raw_images[i], | ||
| 699 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 700 | .format = GetFormat(framebuffer), | ||
| 701 | .components = | ||
| 702 | { | ||
| 703 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 704 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 705 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 706 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 707 | }, | ||
| 708 | .subresourceRange = | ||
| 709 | { | ||
| 710 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 711 | .baseMipLevel = 0, | ||
| 712 | .levelCount = 1, | ||
| 713 | .baseArrayLayer = 0, | ||
| 714 | .layerCount = 1, | ||
| 715 | }, | ||
| 716 | }); | ||
| 766 | } | 717 | } |
| 767 | } | 718 | } |
| 768 | 719 | ||
| @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag | |||
| 789 | const VkDescriptorImageInfo image_info{ | 740 | const VkDescriptorImageInfo image_info{ |
| 790 | .sampler = *sampler, | 741 | .sampler = *sampler, |
| 791 | .imageView = image_view, | 742 | .imageView = image_view, |
| 792 | .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, | 743 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 793 | }; | 744 | }; |
| 794 | 745 | ||
| 795 | const VkWriteDescriptorSet sampler_write{ | 746 | const VkWriteDescriptorSet sampler_write{ |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..2ee374247 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -35,7 +35,6 @@ struct ScreenInfo; | |||
| 35 | 35 | ||
| 36 | class RasterizerVulkan; | 36 | class RasterizerVulkan; |
| 37 | class VKDevice; | 37 | class VKDevice; |
| 38 | class VKImage; | ||
| 39 | class VKScheduler; | 38 | class VKScheduler; |
| 40 | class VKSwapchain; | 39 | class VKSwapchain; |
| 41 | 40 | ||
| @@ -110,7 +109,8 @@ private: | |||
| 110 | std::vector<u64> resource_ticks; | 109 | std::vector<u64> resource_ticks; |
| 111 | 110 | ||
| 112 | std::vector<vk::Semaphore> semaphores; | 111 | std::vector<vk::Semaphore> semaphores; |
| 113 | std::vector<std::unique_ptr<VKImage>> raw_images; | 112 | std::vector<vk::Image> raw_images; |
| 113 | std::vector<vk::ImageView> raw_image_views; | ||
| 114 | std::vector<VKMemoryCommit> raw_buffer_commits; | 114 | std::vector<VKMemoryCommit> raw_buffer_commits; |
| 115 | u32 raw_width = 0; | 115 | u32 raw_width = 0; |
| 116 | u32 raw_height = 0; | 116 | u32 raw_height = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 444d3fb93..10d296c2f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | |||
| 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | |
| 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; |
| 33 | 33 | ||
| 34 | constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = | ||
| 35 | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; | ||
| 36 | |||
| 34 | std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | 37 | std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { |
| 35 | return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); | 38 | return std::make_unique<VKStreamBuffer>(device, scheduler); |
| 36 | } | 39 | } |
| 37 | 40 | ||
| 38 | } // Anonymous namespace | 41 | } // Anonymous namespace |
| 39 | 42 | ||
| 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, | 43 | Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, |
| 41 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | 44 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) |
| 42 | : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { | 45 | : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ |
| 46 | staging_pool_} { | ||
| 43 | const VkBufferCreateInfo ci{ | 47 | const VkBufferCreateInfo ci{ |
| 44 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 48 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 45 | .pNext = nullptr, | 49 | .pNext = nullptr, |
| @@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | |||
| 64 | scheduler.RequestOutsideRenderPassOperationContext(); | 68 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 65 | 69 | ||
| 66 | const VkBuffer handle = Handle(); | 70 | const VkBuffer handle = Handle(); |
| 67 | scheduler.Record( | 71 | scheduler.Record([staging = *staging.handle, handle, offset, data_size, |
| 68 | [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | 72 | &device = device](vk::CommandBuffer cmdbuf) { |
| 69 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | 73 | const VkBufferMemoryBarrier read_barrier{ |
| 70 | 74 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| 71 | const VkBufferMemoryBarrier barrier{ | 75 | .pNext = nullptr, |
| 72 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 76 | .srcAccessMask = |
| 73 | .pNext = nullptr, | 77 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | |
| 74 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 78 | VK_ACCESS_HOST_WRITE_BIT | |
| 75 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | 79 | (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), |
| 76 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 80 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 77 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 81 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 78 | .buffer = handle, | 82 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 79 | .offset = offset, | 83 | .buffer = handle, |
| 80 | .size = data_size, | 84 | .offset = offset, |
| 81 | }; | 85 | .size = data_size, |
| 82 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | 86 | }; |
| 83 | barrier, {}); | 87 | const VkBufferMemoryBarrier write_barrier{ |
| 84 | }); | 88 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| 89 | .pNext = nullptr, | ||
| 90 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 91 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | ||
| 92 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 93 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 94 | .buffer = handle, | ||
| 95 | .offset = offset, | ||
| 96 | .size = data_size, | ||
| 97 | }; | ||
| 98 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 99 | 0, read_barrier); | ||
| 100 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | ||
| 101 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | ||
| 102 | write_barrier); | ||
| 103 | }); | ||
| 85 | } | 104 | } |
| 86 | 105 | ||
| 87 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 106 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { |
| @@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst | |||
| 150 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 169 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 151 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 170 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 152 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 171 | const VKDevice& device_, VKMemoryManager& memory_manager_, |
| 153 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) | 172 | VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, |
| 154 | : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, | 173 | VKStagingBufferPool& staging_pool_) |
| 174 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||
| 175 | cpu_memory_, stream_buffer_}, | ||
| 155 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ | 176 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ |
| 156 | staging_pool_} {} | 177 | staging_pool_} {} |
| 157 | 178 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6008b8373..daf498222 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -41,6 +41,7 @@ public: | |||
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | private: | 43 | private: |
| 44 | const VKDevice& device; | ||
| 44 | VKScheduler& scheduler; | 45 | VKScheduler& scheduler; |
| 45 | VKStagingBufferPool& staging_pool; | 46 | VKStagingBufferPool& staging_pool; |
| 46 | 47 | ||
| @@ -49,10 +50,11 @@ private: | |||
| 49 | 50 | ||
| 50 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 51 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { |
| 51 | public: | 52 | public: |
| 52 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 53 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, |
| 53 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 54 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, |
| 54 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 55 | const VKDevice& device, VKMemoryManager& memory_manager, |
| 55 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); | 56 | VKScheduler& scheduler, VKStreamBuffer& stream_buffer, |
| 57 | VKStagingBufferPool& staging_pool); | ||
| 56 | ~VKBufferCache(); | 58 | ~VKBufferCache(); |
| 57 | 59 | ||
| 58 | BufferInfo GetEmptyBuffer(std::size_t size) override; | 60 | BufferInfo GetEmptyBuffer(std::size_t size) override; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1ac7e2a30..2c030e910 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,6 +10,9 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | ||
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 17 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | 18 | #include "video_core/renderer_vulkan/vk_device.h" |
| @@ -22,99 +25,6 @@ namespace Vulkan { | |||
| 22 | 25 | ||
| 23 | namespace { | 26 | namespace { |
| 24 | 27 | ||
| 25 | // Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. | ||
| 26 | constexpr u8 quad_array[] = { | ||
| 27 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, | ||
| 28 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 29 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 30 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 31 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 32 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 33 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 34 | 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 35 | 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 36 | 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 37 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 38 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 39 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 40 | 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 41 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 42 | 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 43 | 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 44 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 45 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 46 | 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 47 | 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 48 | 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 49 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 50 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 51 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 52 | 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 53 | 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 54 | 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 55 | 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 56 | 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, | ||
| 57 | 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 58 | 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 59 | 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 60 | 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 61 | 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 62 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 63 | 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 64 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 65 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 66 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 67 | 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 68 | 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, | ||
| 69 | 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 70 | 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 71 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 72 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 73 | 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 74 | 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 75 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 76 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, | ||
| 77 | 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, | ||
| 78 | 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 79 | 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, | ||
| 80 | 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 81 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 82 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 83 | 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 84 | 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 85 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 86 | 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 87 | 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 88 | 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 89 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 90 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 91 | 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 92 | 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, | ||
| 93 | 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 94 | 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, | ||
| 95 | 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, | ||
| 96 | 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 97 | 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 98 | 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 99 | 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 100 | 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 101 | 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 102 | 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, | ||
| 103 | 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 104 | 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, | ||
| 105 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, | ||
| 106 | 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, | ||
| 107 | 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 108 | 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, | ||
| 109 | 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 110 | 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 111 | 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 112 | 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 113 | 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, | ||
| 114 | 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 115 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 116 | }; | ||
| 117 | |||
| 118 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | 28 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { |
| 119 | return { | 29 | return { |
| 120 | .binding = 0, | 30 | .binding = 0, |
| @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | |||
| 144 | }; | 54 | }; |
| 145 | } | 55 | } |
| 146 | 56 | ||
| 147 | // Uint8 SPIR-V module. Generated from the "shaders/" directory. | ||
| 148 | constexpr u8 uint8_pass[] = { | ||
| 149 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, | ||
| 150 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, | ||
| 151 | 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, | ||
| 152 | 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, | ||
| 153 | 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, | ||
| 154 | 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, | ||
| 155 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, | ||
| 156 | 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, | ||
| 157 | 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, | ||
| 158 | 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 159 | 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, | ||
| 160 | 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 161 | 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 162 | 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 163 | 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 164 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 165 | 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 166 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 167 | 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, | ||
| 168 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 169 | 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 170 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 171 | 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 172 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 173 | 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 174 | 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 175 | 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 176 | 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 177 | 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, | ||
| 178 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 179 | 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 180 | 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, | ||
| 181 | 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 182 | 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 183 | 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 184 | 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 185 | 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 186 | 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 187 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 188 | 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 189 | 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 190 | 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 191 | 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 192 | 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 193 | 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 194 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 195 | 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 196 | 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, | ||
| 197 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, | ||
| 198 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 199 | 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 200 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 201 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 202 | 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 203 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 204 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 205 | 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 206 | 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 207 | 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 208 | 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 209 | 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 210 | 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 211 | 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 212 | 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 213 | 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 214 | 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 215 | 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 216 | 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 217 | 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, | ||
| 218 | 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 219 | 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 220 | 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 221 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 222 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 223 | }; | ||
| 224 | |||
| 225 | // Quad indexed SPIR-V module. Generated from the "shaders/" directory. | ||
| 226 | constexpr u8 QUAD_INDEXED_SPV[] = { | ||
| 227 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, | ||
| 228 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 229 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 230 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 231 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 232 | 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 233 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 234 | 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 235 | 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 236 | 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 237 | 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 238 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 239 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 240 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 241 | 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 242 | 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 243 | 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 244 | 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 245 | 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 246 | 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 247 | 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 248 | 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 249 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 250 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 251 | 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||
| 252 | 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 253 | 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 254 | 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 255 | 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||
| 256 | 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 257 | 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 258 | 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 259 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 260 | 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 261 | 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 262 | 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 263 | 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 264 | 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 265 | 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 266 | 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 267 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 268 | 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 269 | 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 270 | 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, | ||
| 271 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 272 | 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 273 | 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 274 | 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 275 | 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 276 | 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 277 | 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 278 | 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, | ||
| 279 | 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 280 | 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 281 | 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 282 | 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 283 | 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, | ||
| 284 | 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 285 | 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 286 | 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, | ||
| 287 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, | ||
| 288 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, | ||
| 289 | 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 290 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 291 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, | ||
| 292 | 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, | ||
| 293 | 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 294 | 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, | ||
| 295 | 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 296 | 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 297 | 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 298 | 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 299 | 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 300 | 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 301 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 302 | 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 303 | 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 304 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 305 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 306 | 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 307 | 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 308 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 309 | 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 310 | 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 311 | 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 312 | 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, | ||
| 313 | 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 314 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, | ||
| 315 | 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 316 | 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 317 | 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, | ||
| 318 | 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 319 | 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 320 | 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 321 | 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 322 | 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 323 | 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 324 | 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 325 | 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 326 | 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 327 | 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, | ||
| 328 | 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 329 | 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 330 | 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 331 | 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, | ||
| 332 | 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, | ||
| 333 | 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 334 | 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 335 | 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, | ||
| 336 | 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 337 | 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, | ||
| 338 | 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, | ||
| 339 | 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, | ||
| 340 | 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, | ||
| 341 | 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 342 | 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 343 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 344 | 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, | ||
| 345 | 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 346 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 347 | }; | ||
| 348 | |||
| 349 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | 57 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { |
| 350 | return {{ | 58 | return {{ |
| 351 | { | 59 | { |
| @@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 381 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 89 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, |
| 382 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 90 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 383 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 91 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 384 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, | 92 | vk::Span<VkPushConstantRange> push_constants, |
| 385 | const u8* code) { | 93 | std::span<const u32> code) { |
| 386 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | 94 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ |
| 387 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 95 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 388 | .pNext = nullptr, | 96 | .pNext = nullptr, |
| @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 390 | .bindingCount = bindings.size(), | 98 | .bindingCount = bindings.size(), |
| 391 | .pBindings = bindings.data(), | 99 | .pBindings = bindings.data(), |
| 392 | }); | 100 | }); |
| 393 | |||
| 394 | layout = device.GetLogical().CreatePipelineLayout({ | 101 | layout = device.GetLogical().CreatePipelineLayout({ |
| 395 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 102 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| 396 | .pNext = nullptr, | 103 | .pNext = nullptr, |
| @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 400 | .pushConstantRangeCount = push_constants.size(), | 107 | .pushConstantRangeCount = push_constants.size(), |
| 401 | .pPushConstantRanges = push_constants.data(), | 108 | .pPushConstantRanges = push_constants.data(), |
| 402 | }); | 109 | }); |
| 403 | |||
| 404 | if (!templates.empty()) { | 110 | if (!templates.empty()) { |
| 405 | descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | 111 | descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ |
| 406 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 112 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, |
| @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 417 | 123 | ||
| 418 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | 124 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); |
| 419 | } | 125 | } |
| 420 | |||
| 421 | auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); | ||
| 422 | std::memcpy(code_copy.get(), code, code_size); | ||
| 423 | |||
| 424 | module = device.GetLogical().CreateShaderModule({ | 126 | module = device.GetLogical().CreateShaderModule({ |
| 425 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 127 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| 426 | .pNext = nullptr, | 128 | .pNext = nullptr, |
| 427 | .flags = 0, | 129 | .flags = 0, |
| 428 | .codeSize = code_size, | 130 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 429 | .pCode = code_copy.get(), | 131 | .pCode = code.data(), |
| 430 | }); | 132 | }); |
| 431 | |||
| 432 | pipeline = device.GetLogical().CreateComputePipeline({ | 133 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 433 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 134 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 434 | .pNext = nullptr, | 135 | .pNext = nullptr, |
| @@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, | |||
| 467 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 168 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 468 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | 169 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), |
| 469 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | 170 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), |
| 470 | BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), | 171 | BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), |
| 471 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 172 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 472 | update_descriptor_queue{update_descriptor_queue_} {} | 173 | update_descriptor_queue{update_descriptor_queue_} {} |
| 473 | 174 | ||
| @@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 | |||
| 510 | return {*buffer.handle, 0}; | 211 | return {*buffer.handle, 0}; |
| 511 | } | 212 | } |
| 512 | 213 | ||
| 513 | Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, | 214 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, |
| 514 | VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, | 215 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, |
| 515 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 216 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 516 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 217 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), |
| 517 | BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), | 218 | BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), |
| 518 | uint8_pass), | ||
| 519 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 219 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 520 | update_descriptor_queue{update_descriptor_queue_} {} | 220 | update_descriptor_queue{update_descriptor_queue_} {} |
| 521 | 221 | ||
| @@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler | |||
| 561 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 261 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 562 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 262 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), |
| 563 | BuildInputOutputDescriptorUpdateTemplate(), | 263 | BuildInputOutputDescriptorUpdateTemplate(), |
| 564 | BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), | 264 | BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), |
| 565 | QUAD_INDEXED_SPV), | ||
| 566 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 265 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 567 | update_descriptor_queue{update_descriptor_queue_} {} | 266 | update_descriptor_queue{update_descriptor_queue_} {} |
| 568 | 267 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 2dc87902c..abdf61e2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <span> | ||
| 8 | #include <utility> | 9 | #include <utility> |
| 9 | 10 | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -24,8 +25,7 @@ public: | |||
| 24 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 25 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, |
| 25 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 26 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 26 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 27 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 27 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, | 28 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); |
| 28 | const u8* code); | ||
| 29 | ~VKComputePass(); | 29 | ~VKComputePass(); |
| 30 | 30 | ||
| 31 | protected: | 31 | protected: |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ce3846195..370a63f74 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 46 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, | 46 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, |
| 47 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | 47 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, |
| 48 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, | 48 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, |
| 49 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, | ||
| 49 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 50 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 50 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 51 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 51 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 52 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| @@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 122 | VK_FORMAT_R16G16_UNORM, | 123 | VK_FORMAT_R16G16_UNORM, |
| 123 | VK_FORMAT_R16G16_SNORM, | 124 | VK_FORMAT_R16G16_SNORM, |
| 124 | VK_FORMAT_R16G16_SFLOAT, | 125 | VK_FORMAT_R16G16_SFLOAT, |
| 126 | VK_FORMAT_R16G16_SINT, | ||
| 125 | VK_FORMAT_R16_UNORM, | 127 | VK_FORMAT_R16_UNORM, |
| 126 | VK_FORMAT_R16_UINT, | 128 | VK_FORMAT_R16_UINT, |
| 127 | VK_FORMAT_R8G8B8A8_SRGB, | 129 | VK_FORMAT_R8G8B8A8_SRGB, |
| @@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 161 | VK_FORMAT_BC2_SRGB_BLOCK, | 163 | VK_FORMAT_BC2_SRGB_BLOCK, |
| 162 | VK_FORMAT_BC3_SRGB_BLOCK, | 164 | VK_FORMAT_BC3_SRGB_BLOCK, |
| 163 | VK_FORMAT_BC7_SRGB_BLOCK, | 165 | VK_FORMAT_BC7_SRGB_BLOCK, |
| 166 | VK_FORMAT_ASTC_4x4_UNORM_BLOCK, | ||
| 164 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | 167 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| 165 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | 168 | VK_FORMAT_ASTC_5x4_UNORM_BLOCK, |
| 166 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | ||
| 167 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | 169 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, |
| 168 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | 170 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, |
| 169 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | 171 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, |
| 170 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | 172 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, |
| 171 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | 173 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, |
| 172 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | 174 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, |
| 173 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | 175 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, |
| 176 | VK_FORMAT_ASTC_8x5_UNORM_BLOCK, | ||
| 177 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | ||
| 178 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | ||
| 179 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | ||
| 180 | VK_FORMAT_ASTC_8x8_UNORM_BLOCK, | ||
| 181 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | ||
| 182 | VK_FORMAT_ASTC_10x5_UNORM_BLOCK, | ||
| 183 | VK_FORMAT_ASTC_10x5_SRGB_BLOCK, | ||
| 184 | VK_FORMAT_ASTC_10x6_UNORM_BLOCK, | ||
| 185 | VK_FORMAT_ASTC_10x6_SRGB_BLOCK, | ||
| 186 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | ||
| 187 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | ||
| 174 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | 188 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, |
| 175 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | 189 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, |
| 190 | VK_FORMAT_ASTC_12x10_UNORM_BLOCK, | ||
| 191 | VK_FORMAT_ASTC_12x10_SRGB_BLOCK, | ||
| 176 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | 192 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, |
| 177 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | 193 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, |
| 178 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | 194 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, |
| @@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 192 | 208 | ||
| 193 | VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, | 209 | VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, |
| 194 | VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) | 210 | VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) |
| 195 | : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, | 211 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, |
| 196 | instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { | 212 | instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { |
| 197 | SetupFamilies(surface); | 213 | SetupFamilies(surface); |
| 198 | SetupFeatures(); | 214 | SetupFeatures(); |
| @@ -214,7 +230,7 @@ bool VKDevice::Create() { | |||
| 214 | features2.features = { | 230 | features2.features = { |
| 215 | .robustBufferAccess = false, | 231 | .robustBufferAccess = false, |
| 216 | .fullDrawIndexUint32 = false, | 232 | .fullDrawIndexUint32 = false, |
| 217 | .imageCubeArray = false, | 233 | .imageCubeArray = true, |
| 218 | .independentBlend = true, | 234 | .independentBlend = true, |
| 219 | .geometryShader = true, | 235 | .geometryShader = true, |
| 220 | .tessellationShader = true, | 236 | .tessellationShader = true, |
| @@ -242,7 +258,7 @@ bool VKDevice::Create() { | |||
| 242 | .shaderTessellationAndGeometryPointSize = false, | 258 | .shaderTessellationAndGeometryPointSize = false, |
| 243 | .shaderImageGatherExtended = true, | 259 | .shaderImageGatherExtended = true, |
| 244 | .shaderStorageImageExtendedFormats = false, | 260 | .shaderStorageImageExtendedFormats = false, |
| 245 | .shaderStorageImageMultisample = false, | 261 | .shaderStorageImageMultisample = true, |
| 246 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, | 262 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, |
| 247 | .shaderStorageImageWriteWithoutFormat = true, | 263 | .shaderStorageImageWriteWithoutFormat = true, |
| 248 | .shaderUniformBufferArrayDynamicIndexing = false, | 264 | .shaderUniformBufferArrayDynamicIndexing = false, |
| @@ -268,7 +284,6 @@ bool VKDevice::Create() { | |||
| 268 | .variableMultisampleRate = false, | 284 | .variableMultisampleRate = false, |
| 269 | .inheritedQueries = false, | 285 | .inheritedQueries = false, |
| 270 | }; | 286 | }; |
| 271 | |||
| 272 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ | 287 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ |
| 273 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, | 288 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, |
| 274 | .pNext = nullptr, | 289 | .pNext = nullptr, |
| @@ -380,6 +395,20 @@ bool VKDevice::Create() { | |||
| 380 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); | 395 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); |
| 381 | } | 396 | } |
| 382 | 397 | ||
| 398 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 399 | if (ext_robustness2) { | ||
| 400 | robustness2 = { | ||
| 401 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, | ||
| 402 | .pNext = nullptr, | ||
| 403 | .robustBufferAccess2 = false, | ||
| 404 | .robustImageAccess2 = true, | ||
| 405 | .nullDescriptor = true, | ||
| 406 | }; | ||
| 407 | SetNext(next, robustness2); | ||
| 408 | } else { | ||
| 409 | LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); | ||
| 410 | } | ||
| 411 | |||
| 383 | if (!ext_depth_range_unrestricted) { | 412 | if (!ext_depth_range_unrestricted) { |
| 384 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 413 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 385 | } | 414 | } |
| @@ -405,7 +434,14 @@ bool VKDevice::Create() { | |||
| 405 | } | 434 | } |
| 406 | 435 | ||
| 407 | CollectTelemetryParameters(); | 436 | CollectTelemetryParameters(); |
| 437 | CollectToolingInfo(); | ||
| 408 | 438 | ||
| 439 | if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { | ||
| 440 | LOG_WARNING( | ||
| 441 | Render_Vulkan, | ||
| 442 | "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); | ||
| 443 | ext_extended_dynamic_state = false; | ||
| 444 | } | ||
| 409 | if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { | 445 | if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { |
| 410 | // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it | 446 | // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it |
| 411 | // seems to cause stability issues | 447 | // seems to cause stability issues |
| @@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const { | |||
| 458 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); | 494 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); |
| 459 | 495 | ||
| 460 | // Wait for the log to flush and for Nsight Aftermath to dump the results | 496 | // Wait for the log to flush and for Nsight Aftermath to dump the results |
| 461 | std::this_thread::sleep_for(std::chrono::seconds{3}); | 497 | std::this_thread::sleep_for(std::chrono::seconds{15}); |
| 462 | } | 498 | } |
| 463 | 499 | ||
| 464 | void VKDevice::SaveShader(const std::vector<u32>& spirv) const { | 500 | void VKDevice::SaveShader(const std::vector<u32>& spirv) const { |
| @@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) | |||
| 499 | return true; | 535 | return true; |
| 500 | } | 536 | } |
| 501 | 537 | ||
| 538 | bool VKDevice::TestDepthStencilBlits() const { | ||
| 539 | static constexpr VkFormatFeatureFlags required_features = | ||
| 540 | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; | ||
| 541 | const auto test_features = [](VkFormatProperties props) { | ||
| 542 | return (props.optimalTilingFeatures & required_features) == required_features; | ||
| 543 | }; | ||
| 544 | return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && | ||
| 545 | test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); | ||
| 546 | } | ||
| 547 | |||
| 502 | bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 548 | bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 503 | FormatType format_type) const { | 549 | FormatType format_type) const { |
| 504 | const auto it = format_properties.find(wanted_format); | 550 | const auto it = format_properties.find(wanted_format); |
| @@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { | |||
| 569 | const auto features{physical.GetFeatures()}; | 615 | const auto features{physical.GetFeatures()}; |
| 570 | const std::array feature_report = { | 616 | const std::array feature_report = { |
| 571 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 617 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 618 | std::make_pair(features.imageCubeArray, "imageCubeArray"), | ||
| 572 | std::make_pair(features.independentBlend, "independentBlend"), | 619 | std::make_pair(features.independentBlend, "independentBlend"), |
| 573 | std::make_pair(features.depthClamp, "depthClamp"), | 620 | std::make_pair(features.depthClamp, "depthClamp"), |
| 574 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), | 621 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), |
| @@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { | |||
| 580 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), | 627 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), |
| 581 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), | 628 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), |
| 582 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 629 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 630 | std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), | ||
| 583 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 631 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| 584 | "shaderStorageImageWriteWithoutFormat"), | 632 | "shaderStorageImageWriteWithoutFormat"), |
| 585 | }; | 633 | }; |
| @@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 608 | bool has_ext_transform_feedback{}; | 656 | bool has_ext_transform_feedback{}; |
| 609 | bool has_ext_custom_border_color{}; | 657 | bool has_ext_custom_border_color{}; |
| 610 | bool has_ext_extended_dynamic_state{}; | 658 | bool has_ext_extended_dynamic_state{}; |
| 659 | bool has_ext_robustness2{}; | ||
| 611 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | 660 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { |
| 612 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | 661 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 613 | bool push) { | 662 | bool push) { |
| @@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 627 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 676 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); |
| 628 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 677 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 629 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 678 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 679 | test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); | ||
| 630 | test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, | 680 | test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, |
| 631 | true); | 681 | true); |
| 682 | test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); | ||
| 683 | test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); | ||
| 632 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); | 684 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); |
| 633 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 685 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 634 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 686 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 687 | test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); | ||
| 635 | if (instance_version >= VK_API_VERSION_1_1) { | 688 | if (instance_version >= VK_API_VERSION_1_1) { |
| 636 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 689 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 637 | } | 690 | } |
| @@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 733 | } | 786 | } |
| 734 | } | 787 | } |
| 735 | 788 | ||
| 789 | if (has_ext_robustness2) { | ||
| 790 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 791 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | ||
| 792 | robustness2.pNext = nullptr; | ||
| 793 | features.pNext = &robustness2; | ||
| 794 | physical.GetFeatures2KHR(features); | ||
| 795 | if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { | ||
| 796 | extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); | ||
| 797 | ext_robustness2 = true; | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 736 | return extensions; | 801 | return extensions; |
| 737 | } | 802 | } |
| 738 | 803 | ||
| @@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) { | |||
| 764 | void VKDevice::SetupFeatures() { | 829 | void VKDevice::SetupFeatures() { |
| 765 | const auto supported_features{physical.GetFeatures()}; | 830 | const auto supported_features{physical.GetFeatures()}; |
| 766 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | 831 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 832 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); | ||
| 767 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); | 833 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); |
| 768 | } | 834 | } |
| 769 | 835 | ||
| @@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() { | |||
| 794 | } | 860 | } |
| 795 | } | 861 | } |
| 796 | 862 | ||
| 863 | void VKDevice::CollectToolingInfo() { | ||
| 864 | if (!ext_tooling_info) { | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | const auto vkGetPhysicalDeviceToolPropertiesEXT = | ||
| 868 | reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>( | ||
| 869 | dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); | ||
| 870 | if (!vkGetPhysicalDeviceToolPropertiesEXT) { | ||
| 871 | return; | ||
| 872 | } | ||
| 873 | u32 tool_count = 0; | ||
| 874 | if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { | ||
| 875 | return; | ||
| 876 | } | ||
| 877 | std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count); | ||
| 878 | if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { | ||
| 879 | return; | ||
| 880 | } | ||
| 881 | for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { | ||
| 882 | const std::string_view name = tool.name; | ||
| 883 | LOG_INFO(Render_Vulkan, "{}", name); | ||
| 884 | has_renderdoc = has_renderdoc || name == "RenderDoc"; | ||
| 885 | has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 797 | std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { | 889 | std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { |
| 798 | static constexpr float QUEUE_PRIORITY = 1.0f; | 890 | static constexpr float QUEUE_PRIORITY = 1.0f; |
| 799 | 891 | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 4286673d9..995dcfc0f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -157,6 +157,11 @@ public: | |||
| 157 | return is_formatless_image_load_supported; | 157 | return is_formatless_image_load_supported; |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | /// Returns true when blitting from and to depth stencil images is supported. | ||
| 161 | bool IsBlitDepthStencilSupported() const { | ||
| 162 | return is_blit_depth_stencil_supported; | ||
| 163 | } | ||
| 164 | |||
| 160 | /// Returns true if the device supports VK_NV_viewport_swizzle. | 165 | /// Returns true if the device supports VK_NV_viewport_swizzle. |
| 161 | bool IsNvViewportSwizzleSupported() const { | 166 | bool IsNvViewportSwizzleSupported() const { |
| 162 | return nv_viewport_swizzle; | 167 | return nv_viewport_swizzle; |
| @@ -172,6 +177,11 @@ public: | |||
| 172 | return ext_index_type_uint8; | 177 | return ext_index_type_uint8; |
| 173 | } | 178 | } |
| 174 | 179 | ||
| 180 | /// Returns true if the device supports VK_EXT_sampler_filter_minmax. | ||
| 181 | bool IsExtSamplerFilterMinmaxSupported() const { | ||
| 182 | return ext_sampler_filter_minmax; | ||
| 183 | } | ||
| 184 | |||
| 175 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. | 185 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. |
| 176 | bool IsExtDepthRangeUnrestrictedSupported() const { | 186 | bool IsExtDepthRangeUnrestrictedSupported() const { |
| 177 | return ext_depth_range_unrestricted; | 187 | return ext_depth_range_unrestricted; |
| @@ -197,6 +207,16 @@ public: | |||
| 197 | return ext_extended_dynamic_state; | 207 | return ext_extended_dynamic_state; |
| 198 | } | 208 | } |
| 199 | 209 | ||
| 210 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | ||
| 211 | bool IsExtShaderStencilExportSupported() const { | ||
| 212 | return ext_shader_stencil_export; | ||
| 213 | } | ||
| 214 | |||
| 215 | /// Returns true when a known debugging tool is attached. | ||
| 216 | bool HasDebuggingToolAttached() const { | ||
| 217 | return has_renderdoc || has_nsight_graphics; | ||
| 218 | } | ||
| 219 | |||
| 200 | /// Returns the vendor name reported from Vulkan. | 220 | /// Returns the vendor name reported from Vulkan. |
| 201 | std::string_view GetVendorName() const { | 221 | std::string_view GetVendorName() const { |
| 202 | return vendor_name; | 222 | return vendor_name; |
| @@ -228,16 +248,23 @@ private: | |||
| 228 | /// Collects telemetry information from the device. | 248 | /// Collects telemetry information from the device. |
| 229 | void CollectTelemetryParameters(); | 249 | void CollectTelemetryParameters(); |
| 230 | 250 | ||
| 251 | /// Collects information about attached tools. | ||
| 252 | void CollectToolingInfo(); | ||
| 253 | |||
| 231 | /// Returns a list of queue initialization descriptors. | 254 | /// Returns a list of queue initialization descriptors. |
| 232 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | 255 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; |
| 233 | 256 | ||
| 234 | /// Returns true if ASTC textures are natively supported. | 257 | /// Returns true if ASTC textures are natively supported. |
| 235 | bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; | 258 | bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; |
| 236 | 259 | ||
| 260 | /// Returns true if the device natively supports blitting depth stencil images. | ||
| 261 | bool TestDepthStencilBlits() const; | ||
| 262 | |||
| 237 | /// Returns true if a format is supported. | 263 | /// Returns true if a format is supported. |
| 238 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 264 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 239 | FormatType format_type) const; | 265 | FormatType format_type) const; |
| 240 | 266 | ||
| 267 | VkInstance instance; ///< Vulkan instance. | ||
| 241 | vk::DeviceDispatch dld; ///< Device function pointers. | 268 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 242 | vk::PhysicalDevice physical; ///< Physical device. | 269 | vk::PhysicalDevice physical; ///< Physical device. |
| 243 | VkPhysicalDeviceProperties properties; ///< Device properties. | 270 | VkPhysicalDeviceProperties properties; ///< Device properties. |
| @@ -253,15 +280,22 @@ private: | |||
| 253 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 280 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 254 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 281 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 255 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | 282 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. |
| 283 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. | ||
| 256 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 284 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 257 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 285 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 258 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 286 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 287 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. | ||
| 259 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 288 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 260 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 289 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 290 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. | ||
| 261 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 291 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 262 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 292 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. |
| 263 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 293 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 294 | bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. | ||
| 295 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | ||
| 264 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 296 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 297 | bool has_renderdoc{}; ///< Has RenderDoc attached | ||
| 298 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | ||
| 265 | 299 | ||
| 266 | // Asynchronous Graphics Pipeline setting | 300 | // Asynchronous Graphics Pipeline setting |
| 267 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | 301 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0bcaee714..774a12a53 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const { | |||
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 75 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 76 | Tegra::MemoryManager& memory_manager_, | 76 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, |
| 77 | VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, | 77 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, |
| 78 | VKQueryCache& query_cache_, const VKDevice& device_, | 78 | const VKDevice& device_, VKScheduler& scheduler_) |
| 79 | VKScheduler& scheduler_) | ||
| 80 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 79 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 81 | device{device_}, scheduler{scheduler_} {} | 80 | device{device_}, scheduler{scheduler_} {} |
| 82 | 81 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c8547cc24..c2869e8e3 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "video_core/fence_manager.h" | 9 | #include "video_core/fence_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 12 | #include "video_core/renderer_vulkan/wrapper.h" |
| 12 | 13 | ||
| 13 | namespace Core { | 14 | namespace Core { |
| @@ -24,7 +25,6 @@ class VKBufferCache; | |||
| 24 | class VKDevice; | 25 | class VKDevice; |
| 25 | class VKQueryCache; | 26 | class VKQueryCache; |
| 26 | class VKScheduler; | 27 | class VKScheduler; |
| 27 | class VKTextureCache; | ||
| 28 | 28 | ||
| 29 | class InnerFence : public VideoCommon::FenceBase { | 29 | class InnerFence : public VideoCommon::FenceBase { |
| 30 | public: | 30 | public: |
| @@ -51,12 +51,12 @@ private: | |||
| 51 | using Fence = std::shared_ptr<InnerFence>; | 51 | using Fence = std::shared_ptr<InnerFence>; |
| 52 | 52 | ||
| 53 | using GenericFenceManager = | 53 | using GenericFenceManager = |
| 54 | VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; | 54 | VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; |
| 55 | 55 | ||
| 56 | class VKFenceManager final : public GenericFenceManager { | 56 | class VKFenceManager final : public GenericFenceManager { |
| 57 | public: | 57 | public: |
| 58 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 58 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 59 | Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, | 59 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, |
| 60 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 60 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, |
| 61 | const VKDevice& device_, VKScheduler& scheduler_); | 61 | const VKDevice& device_, VKScheduler& scheduler_); |
| 62 | 62 | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..7979df3a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | 15 | #include "video_core/renderer_vulkan/vk_device.h" |
| 16 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 16 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 17 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 17 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 18 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 21 | #include "video_core/renderer_vulkan/wrapper.h" | 20 | #include "video_core/renderer_vulkan/wrapper.h" |
| @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 69 | }; | 68 | }; |
| 70 | } | 69 | } |
| 71 | 70 | ||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 72 | switch (msaa_mode) { | ||
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 74 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 72 | } // Anonymous namespace | 95 | } // Anonymous namespace |
| 73 | 96 | ||
| 74 | VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, | 97 | VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, |
| 75 | VKDescriptorPool& descriptor_pool_, | 98 | VKDescriptorPool& descriptor_pool_, |
| 76 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 77 | VKRenderPassCache& renderpass_cache_, | 100 | const GraphicsPipelineCacheKey& key, |
| 78 | const GraphicsPipelineCacheKey& key_, | 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 79 | vk::Span<VkDescriptorSetLayoutBinding> bindings_, | 102 | const SPIRVProgram& program, u32 num_color_buffers) |
| 80 | const SPIRVProgram& program_) | 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, |
| 81 | : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, | 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, |
| 82 | descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, | ||
| 83 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, |
| 84 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, |
| 85 | descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( | 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, |
| 86 | program_)}, | 108 | modules(CreateShaderModules(program)), |
| 87 | renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, | 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} |
| 88 | pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} | ||
| 89 | 110 | ||
| 90 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; |
| 91 | 112 | ||
| @@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||
| 179 | return shader_modules; | 200 | return shader_modules; |
| 180 | } | 201 | } |
| 181 | 202 | ||
| 182 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | 203 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, |
| 183 | const SPIRVProgram& program) const { | 204 | VkRenderPass renderpass, |
| 205 | u32 num_color_buffers) const { | ||
| 184 | const auto& state = cache_key.fixed_state; | 206 | const auto& state = cache_key.fixed_state; |
| 185 | const auto& viewport_swizzles = state.viewport_swizzles; | 207 | const auto& viewport_swizzles = state.viewport_swizzles; |
| 186 | 208 | ||
| @@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 290 | }; | 312 | }; |
| 291 | 313 | ||
| 292 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | 314 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; |
| 293 | std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), | 315 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); |
| 294 | UnpackViewportSwizzle); | ||
| 295 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | 316 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ |
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | 317 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, |
| 297 | .pNext = nullptr, | 318 | .pNext = nullptr, |
| @@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 326 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | 347 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| 327 | .pNext = nullptr, | 348 | .pNext = nullptr, |
| 328 | .flags = 0, | 349 | .flags = 0, |
| 329 | .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, | 350 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), |
| 330 | .sampleShadingEnable = VK_FALSE, | 351 | .sampleShadingEnable = VK_FALSE, |
| 331 | .minSampleShading = 0.0f, | 352 | .minSampleShading = 0.0f, |
| 332 | .pSampleMask = nullptr, | 353 | .pSampleMask = nullptr, |
| @@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 352 | }; | 373 | }; |
| 353 | 374 | ||
| 354 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 375 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 355 | const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); | 376 | for (std::size_t index = 0; index < num_color_buffers; ++index) { |
| 356 | for (std::size_t index = 0; index < num_attachments; ++index) { | ||
| 357 | static constexpr std::array COMPONENT_TABLE{ | 377 | static constexpr std::array COMPONENT_TABLE{ |
| 358 | VK_COLOR_COMPONENT_R_BIT, | 378 | VK_COLOR_COMPONENT_R_BIT, |
| 359 | VK_COLOR_COMPONENT_G_BIT, | 379 | VK_COLOR_COMPONENT_G_BIT, |
| @@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 387 | .flags = 0, | 407 | .flags = 0, |
| 388 | .logicOpEnable = VK_FALSE, | 408 | .logicOpEnable = VK_FALSE, |
| 389 | .logicOp = VK_LOGIC_OP_COPY, | 409 | .logicOp = VK_LOGIC_OP_COPY, |
| 390 | .attachmentCount = static_cast<u32>(num_attachments), | 410 | .attachmentCount = num_color_buffers, |
| 391 | .pAttachments = cb_attachments.data(), | 411 | .pAttachments = cb_attachments.data(), |
| 392 | .blendConstants = {}, | 412 | .blendConstants = {}, |
| 393 | }; | 413 | }; |
| @@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 447 | stage_ci.pNext = &subgroup_size_ci; | 467 | stage_ci.pNext = &subgroup_size_ci; |
| 448 | } | 468 | } |
| 449 | } | 469 | } |
| 450 | 470 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | |
| 451 | const VkGraphicsPipelineCreateInfo ci{ | ||
| 452 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 471 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 453 | .pNext = nullptr, | 472 | .pNext = nullptr, |
| 454 | .flags = 0, | 473 | .flags = 0, |
| @@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 468 | .subpass = 0, | 487 | .subpass = 0, |
| 469 | .basePipelineHandle = nullptr, | 488 | .basePipelineHandle = nullptr, |
| 470 | .basePipelineIndex = 0, | 489 | .basePipelineIndex = 0, |
| 471 | }; | 490 | }); |
| 472 | return device.GetLogical().CreateGraphicsPipeline(ci); | ||
| 473 | } | 491 | } |
| 474 | 492 | ||
| 475 | } // namespace Vulkan | 493 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3fb31d55a..214d06b4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -8,10 +8,10 @@ | |||
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 14 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | 16 | #include "video_core/renderer_vulkan/wrapper.h" |
| 17 | 17 | ||
| @@ -20,8 +20,7 @@ namespace Vulkan { | |||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 21 | 21 | ||
| 22 | struct GraphicsPipelineCacheKey { | 22 | struct GraphicsPipelineCacheKey { |
| 23 | RenderPassParams renderpass_params; | 23 | VkRenderPass renderpass; |
| 24 | u32 padding; | ||
| 25 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; |
| 26 | FixedPipelineState fixed_state; | 25 | FixedPipelineState fixed_state; |
| 27 | 26 | ||
| @@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey { | |||
| 34 | } | 33 | } |
| 35 | 34 | ||
| 36 | std::size_t Size() const noexcept { | 35 | std::size_t Size() const noexcept { |
| 37 | return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); | 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); |
| 38 | } | 37 | } |
| 39 | }; | 38 | }; |
| 40 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |
| @@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | |||
| 43 | 42 | ||
| 44 | class VKDescriptorPool; | 43 | class VKDescriptorPool; |
| 45 | class VKDevice; | 44 | class VKDevice; |
| 46 | class VKRenderPassCache; | ||
| 47 | class VKScheduler; | 45 | class VKScheduler; |
| 48 | class VKUpdateDescriptorQueue; | 46 | class VKUpdateDescriptorQueue; |
| 49 | 47 | ||
| @@ -52,12 +50,11 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt | |||
| 52 | class VKGraphicsPipeline final { | 50 | class VKGraphicsPipeline final { |
| 53 | public: | 51 | public: |
| 54 | explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, | 52 | explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, |
| 55 | VKDescriptorPool& descriptor_pool_, | 53 | VKDescriptorPool& descriptor_pool, |
| 56 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 57 | VKRenderPassCache& renderpass_cache_, | 55 | const GraphicsPipelineCacheKey& key, |
| 58 | const GraphicsPipelineCacheKey& key_, | 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 59 | vk::Span<VkDescriptorSetLayoutBinding> bindings_, | 57 | const SPIRVProgram& program, u32 num_color_buffers); |
| 60 | const SPIRVProgram& program_); | ||
| 61 | ~VKGraphicsPipeline(); | 58 | ~VKGraphicsPipeline(); |
| 62 | 59 | ||
| 63 | VkDescriptorSet CommitDescriptorSet(); | 60 | VkDescriptorSet CommitDescriptorSet(); |
| @@ -70,10 +67,6 @@ public: | |||
| 70 | return *layout; | 67 | return *layout; |
| 71 | } | 68 | } |
| 72 | 69 | ||
| 73 | VkRenderPass GetRenderPass() const { | ||
| 74 | return renderpass; | ||
| 75 | } | ||
| 76 | |||
| 77 | GraphicsPipelineCacheKey GetCacheKey() const { | 70 | GraphicsPipelineCacheKey GetCacheKey() const { |
| 78 | return cache_key; | 71 | return cache_key; |
| 79 | } | 72 | } |
| @@ -89,8 +82,8 @@ private: | |||
| 89 | 82 | ||
| 90 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; |
| 91 | 84 | ||
| 92 | vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, | 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, |
| 93 | const SPIRVProgram& program) const; | 86 | u32 num_color_buffers) const; |
| 94 | 87 | ||
| 95 | const VKDevice& device; | 88 | const VKDevice& device; |
| 96 | VKScheduler& scheduler; | 89 | VKScheduler& scheduler; |
| @@ -104,7 +97,6 @@ private: | |||
| 104 | vk::DescriptorUpdateTemplateKHR descriptor_template; | 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 105 | std::vector<vk::ShaderModule> modules; | 98 | std::vector<vk::ShaderModule> modules; |
| 106 | 99 | ||
| 107 | VkRenderPass renderpass; | ||
| 108 | vk::Pipeline pipeline; | 100 | vk::Pipeline pipeline; |
| 109 | }; | 101 | }; |
| 110 | 102 | ||
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 072d14e3b..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null | |||
| @@ -1,135 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, | ||
| 17 | const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) | ||
| 18 | : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, | ||
| 19 | image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { | ||
| 20 | UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, | ||
| 21 | "Queue family tracking is not implemented"); | ||
| 22 | |||
| 23 | image = device_.GetLogical().CreateImage(image_ci_); | ||
| 24 | |||
| 25 | const u32 num_ranges = image_num_layers * image_num_levels; | ||
| 26 | barriers.resize(num_ranges); | ||
| 27 | subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); | ||
| 28 | } | ||
| 29 | |||
| 30 | VKImage::~VKImage() = default; | ||
| 31 | |||
| 32 | void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 33 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | ||
| 34 | VkImageLayout new_layout) { | ||
| 35 | if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { | ||
| 36 | return; | ||
| 37 | } | ||
| 38 | |||
| 39 | std::size_t cursor = 0; | ||
| 40 | for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { | ||
| 41 | for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { | ||
| 42 | const u32 layer = base_layer + layer_it; | ||
| 43 | const u32 level = base_level + level_it; | ||
| 44 | auto& state = GetSubrangeState(layer, level); | ||
| 45 | auto& barrier = barriers[cursor]; | ||
| 46 | barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; | ||
| 47 | barrier.pNext = nullptr; | ||
| 48 | barrier.srcAccessMask = state.access; | ||
| 49 | barrier.dstAccessMask = new_access; | ||
| 50 | barrier.oldLayout = state.layout; | ||
| 51 | barrier.newLayout = new_layout; | ||
| 52 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 53 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 54 | barrier.image = *image; | ||
| 55 | barrier.subresourceRange.aspectMask = aspect_mask; | ||
| 56 | barrier.subresourceRange.baseMipLevel = level; | ||
| 57 | barrier.subresourceRange.levelCount = 1; | ||
| 58 | barrier.subresourceRange.baseArrayLayer = layer; | ||
| 59 | barrier.subresourceRange.layerCount = 1; | ||
| 60 | state.access = new_access; | ||
| 61 | state.layout = new_layout; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 66 | |||
| 67 | scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { | ||
| 68 | // TODO(Rodrigo): Implement a way to use the latest stage across subresources. | ||
| 69 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 70 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, | ||
| 71 | vk::Span(barriers.data(), cursor)); | ||
| 72 | }); | ||
| 73 | } | ||
| 74 | |||
| 75 | bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 76 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept { | ||
| 77 | const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && | ||
| 78 | base_level == 0 && num_levels == image_num_levels; | ||
| 79 | if (!is_full_range) { | ||
| 80 | state_diverged = true; | ||
| 81 | } | ||
| 82 | |||
| 83 | if (!state_diverged) { | ||
| 84 | auto& state = GetSubrangeState(0, 0); | ||
| 85 | if (state.access != new_access || state.layout != new_layout) { | ||
| 86 | return true; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { | ||
| 91 | for (u32 level_it = 0; level_it < num_levels; ++level_it) { | ||
| 92 | const u32 layer = base_layer + layer_it; | ||
| 93 | const u32 level = base_level + level_it; | ||
| 94 | auto& state = GetSubrangeState(layer, level); | ||
| 95 | if (state.access != new_access || state.layout != new_layout) { | ||
| 96 | return true; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | |||
| 103 | void VKImage::CreatePresentView() { | ||
| 104 | // Image type has to be 2D to be presented. | ||
| 105 | present_view = device.GetLogical().CreateImageView({ | ||
| 106 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 107 | .pNext = nullptr, | ||
| 108 | .flags = 0, | ||
| 109 | .image = *image, | ||
| 110 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 111 | .format = format, | ||
| 112 | .components = | ||
| 113 | { | ||
| 114 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 115 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 116 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 117 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 118 | }, | ||
| 119 | .subresourceRange = | ||
| 120 | { | ||
| 121 | .aspectMask = aspect_mask, | ||
| 122 | .baseMipLevel = 0, | ||
| 123 | .levelCount = 1, | ||
| 124 | .baseArrayLayer = 0, | ||
| 125 | .layerCount = 1, | ||
| 126 | }, | ||
| 127 | }); | ||
| 128 | } | ||
| 129 | |||
| 130 | VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { | ||
| 131 | return subrange_states[static_cast<std::size_t>(layer * image_num_levels) + | ||
| 132 | static_cast<std::size_t>(level)]; | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Vulkan \ No newline at end of file | ||
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index 287ab90ca..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null | |||
| @@ -1,84 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 12 | |||
| 13 | namespace Vulkan { | ||
| 14 | |||
| 15 | class VKDevice; | ||
| 16 | class VKScheduler; | ||
| 17 | |||
| 18 | class VKImage { | ||
| 19 | public: | ||
| 20 | explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, | ||
| 21 | const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); | ||
| 22 | ~VKImage(); | ||
| 23 | |||
| 24 | /// Records in the passed command buffer an image transition and updates the state of the image. | ||
| 25 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 26 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | ||
| 27 | VkImageLayout new_layout); | ||
| 28 | |||
| 29 | /// Returns a view compatible with presentation, the image has to be 2D. | ||
| 30 | VkImageView GetPresentView() { | ||
| 31 | if (!present_view) { | ||
| 32 | CreatePresentView(); | ||
| 33 | } | ||
| 34 | return *present_view; | ||
| 35 | } | ||
| 36 | |||
| 37 | /// Returns the Vulkan image handler. | ||
| 38 | const vk::Image& GetHandle() const { | ||
| 39 | return image; | ||
| 40 | } | ||
| 41 | |||
| 42 | /// Returns the Vulkan format for this image. | ||
| 43 | VkFormat GetFormat() const { | ||
| 44 | return format; | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns the Vulkan aspect mask. | ||
| 48 | VkImageAspectFlags GetAspectMask() const { | ||
| 49 | return aspect_mask; | ||
| 50 | } | ||
| 51 | |||
| 52 | private: | ||
| 53 | struct SubrangeState final { | ||
| 54 | VkAccessFlags access = 0; ///< Current access bits. | ||
| 55 | VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. | ||
| 56 | }; | ||
| 57 | |||
| 58 | bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 59 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept; | ||
| 60 | |||
| 61 | /// Creates a presentation view. | ||
| 62 | void CreatePresentView(); | ||
| 63 | |||
| 64 | /// Returns the subrange state for a layer and layer. | ||
| 65 | SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; | ||
| 66 | |||
| 67 | const VKDevice& device; ///< Device handler. | ||
| 68 | VKScheduler& scheduler; ///< Device scheduler. | ||
| 69 | |||
| 70 | const VkFormat format; ///< Vulkan format. | ||
| 71 | const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. | ||
| 72 | const u32 image_num_layers; ///< Number of layers. | ||
| 73 | const u32 image_num_levels; ///< Number of mipmap levels. | ||
| 74 | |||
| 75 | vk::Image image; ///< Image handle. | ||
| 76 | vk::ImageView present_view; ///< Image view compatible with presentation. | ||
| 77 | |||
| 78 | std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. | ||
| 79 | std::vector<SubrangeState> subrange_states; ///< Current subrange state. | ||
| 80 | |||
| 81 | bool state_diverged = false; ///< True when subresources mismatch in layout. | ||
| 82 | }; | ||
| 83 | |||
| 84 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index be53d450f..56b24b70f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp | |||
| @@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() { | |||
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { | 218 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { |
| 219 | return MemoryMap{this, memory.Map(interval.first + offset_, size)}; | 219 | return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size)); |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | void VKMemoryCommitImpl::Unmap() const { | 222 | void VKMemoryCommitImpl::Unmap() const { |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 39f903ec8..318f8b43e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <span> | ||
| 8 | #include <utility> | 9 | #include <utility> |
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -93,8 +94,8 @@ private: | |||
| 93 | /// Holds ownership of a memory map. | 94 | /// Holds ownership of a memory map. |
| 94 | class MemoryMap final { | 95 | class MemoryMap final { |
| 95 | public: | 96 | public: |
| 96 | explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) | 97 | explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_) |
| 97 | : commit{commit_}, address{address_} {} | 98 | : commit{commit_}, span{span_} {} |
| 98 | 99 | ||
| 99 | ~MemoryMap() { | 100 | ~MemoryMap() { |
| 100 | if (commit) { | 101 | if (commit) { |
| @@ -108,19 +109,24 @@ public: | |||
| 108 | commit = nullptr; | 109 | commit = nullptr; |
| 109 | } | 110 | } |
| 110 | 111 | ||
| 112 | /// Returns a span to the memory map. | ||
| 113 | [[nodiscard]] std::span<u8> Span() const noexcept { | ||
| 114 | return span; | ||
| 115 | } | ||
| 116 | |||
| 111 | /// Returns the address of the memory map. | 117 | /// Returns the address of the memory map. |
| 112 | u8* GetAddress() const { | 118 | [[nodiscard]] u8* Address() const noexcept { |
| 113 | return address; | 119 | return span.data(); |
| 114 | } | 120 | } |
| 115 | 121 | ||
| 116 | /// Returns the address of the memory map; | 122 | /// Returns the address of the memory map; |
| 117 | operator u8*() const { | 123 | [[nodiscard]] operator u8*() const noexcept { |
| 118 | return address; | 124 | return span.data(); |
| 119 | } | 125 | } |
| 120 | 126 | ||
| 121 | private: | 127 | private: |
| 122 | const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. | 128 | const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. |
| 123 | u8* address{}; ///< Address to the mapped memory. | 129 | std::span<u8> span; ///< Address to the mapped memory. |
| 124 | }; | 130 | }; |
| 125 | 131 | ||
| 126 | } // namespace Vulkan | 132 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3fb264d03..083796d05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 11 | #include "common/cityhash.h" | ||
| 11 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 12 | #include "core/core.h" | 13 | #include "core/core.h" |
| 13 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| @@ -22,7 +23,6 @@ | |||
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 23 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 25 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 27 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 28 | #include "video_core/renderer_vulkan/wrapper.h" | 28 | #include "video_core/renderer_vulkan/wrapper.h" |
| @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX | |||
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |
| 53 | 53 | ||
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| 55 | VideoCommon::Shader::CompileDepth::FullDecompile}; | 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, |
| 56 | .disable_else_derivation = true, | ||
| 57 | }; | ||
| 56 | 58 | ||
| 57 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { |
| 58 | return program == 0 ? 0 : program - 1; | 60 | return program == 0 ? 0 : program - 1; |
| @@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_ | |||
| 149 | Tegra::Engines::KeplerCompute& kepler_compute_, | 151 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 150 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 152 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, |
| 151 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |
| 152 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 153 | VKRenderPassCache& renderpass_cache_) | 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |
| 154 | : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |
| 155 | gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, | 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ |
| 156 | descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, | 158 | update_descriptor_queue_} {} |
| 157 | renderpass_cache{renderpass_cache_} {} | ||
| 158 | 159 | ||
| 159 | VKPipelineCache::~VKPipelineCache() = default; | 160 | VKPipelineCache::~VKPipelineCache() = default; |
| 160 | 161 | ||
| @@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 199 | } | 200 | } |
| 200 | 201 | ||
| 201 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( |
| 202 | const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { | 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, |
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 203 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 204 | 206 | ||
| 205 | if (last_graphics_pipeline && last_graphics_key == key) { | 207 | if (last_graphics_pipeline && last_graphics_key == key) { |
| @@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | |||
| 215 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 216 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); |
| 217 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, |
| 218 | update_descriptor_queue, renderpass_cache, bindings, | 220 | update_descriptor_queue, bindings, program, key, |
| 219 | program, key); | 221 | num_color_buffers); |
| 220 | } | 222 | } |
| 221 | last_graphics_pipeline = pair->second.get(); | 223 | last_graphics_pipeline = pair->second.get(); |
| 222 | return last_graphics_pipeline; | 224 | return last_graphics_pipeline; |
| @@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | |||
| 229 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 230 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); |
| 231 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, |
| 232 | update_descriptor_queue, renderpass_cache, key, | 234 | update_descriptor_queue, key, bindings, |
| 233 | bindings, program); | 235 | program, num_color_buffers); |
| 234 | gpu.ShaderNotify().MarkShaderComplete(); | 236 | gpu.ShaderNotify().MarkShaderComplete(); |
| 235 | } | 237 | } |
| 236 | last_graphics_pipeline = entry.get(); | 238 | last_graphics_pipeline = entry.get(); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 9e1f8fcbb..fbaa8257c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 22 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 24 | #include "video_core/renderer_vulkan/wrapper.h" | 23 | #include "video_core/renderer_vulkan/wrapper.h" |
| 25 | #include "video_core/shader/async_shaders.h" | 24 | #include "video_core/shader/async_shaders.h" |
| @@ -119,18 +118,18 @@ private: | |||
| 119 | 118 | ||
| 120 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 121 | public: | 120 | public: |
| 122 | explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, |
| 123 | Tegra::Engines::Maxwell3D& maxwell3d_, | 122 | Tegra::Engines::Maxwell3D& maxwell3d, |
| 124 | Tegra::Engines::KeplerCompute& kepler_compute_, | 123 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 125 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 124 | Tegra::MemoryManager& gpu_memory, const VKDevice& device, |
| 126 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |
| 127 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 126 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 128 | VKRenderPassCache& renderpass_cache_); | ||
| 129 | ~VKPipelineCache() override; | 127 | ~VKPipelineCache() override; |
| 130 | 128 | ||
| 131 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |
| 132 | 130 | ||
| 133 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, |
| 132 | u32 num_color_buffers, | ||
| 134 | VideoCommon::Shader::AsyncShaders& async_shaders); | 133 | VideoCommon::Shader::AsyncShaders& async_shaders); |
| 135 | 134 | ||
| 136 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| @@ -153,7 +152,6 @@ private: | |||
| 153 | VKScheduler& scheduler; | 152 | VKScheduler& scheduler; |
| 154 | VKDescriptorPool& descriptor_pool; | 153 | VKDescriptorPool& descriptor_pool; |
| 155 | VKUpdateDescriptorQueue& update_descriptor_queue; | 154 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 156 | VKRenderPassCache& renderpass_cache; | ||
| 157 | 155 | ||
| 158 | std::unique_ptr<Shader> null_shader; | 156 | std::unique_ptr<Shader> null_shader; |
| 159 | std::unique_ptr<Shader> null_kernel; | 157 | std::unique_ptr<Shader> null_kernel; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f93986aab..04c5c859c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "core/settings.h" | 19 | #include "core/settings.h" |
| 20 | #include "video_core/engines/kepler_compute.h" | 20 | #include "video_core/engines/kepler_compute.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 23 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 24 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| @@ -30,8 +31,6 @@ | |||
| 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 31 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 31 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 32 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 33 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 35 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 34 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 36 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 35 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 37 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 36 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| @@ -39,10 +38,13 @@ | |||
| 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 38 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 40 | #include "video_core/renderer_vulkan/wrapper.h" | 39 | #include "video_core/renderer_vulkan/wrapper.h" |
| 41 | #include "video_core/shader_cache.h" | 40 | #include "video_core/shader_cache.h" |
| 41 | #include "video_core/texture_cache/texture_cache.h" | ||
| 42 | 42 | ||
| 43 | namespace Vulkan { | 43 | namespace Vulkan { |
| 44 | 44 | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using VideoCommon::ImageViewId; | ||
| 47 | using VideoCommon::ImageViewType; | ||
| 46 | 48 | ||
| 47 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); | 49 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); |
| 48 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | 50 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); |
| @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192 | |||
| 58 | 60 | ||
| 59 | namespace { | 61 | namespace { |
| 60 | 62 | ||
| 61 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | 63 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); |
| 62 | 64 | ||
| 63 | VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | 65 | VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { |
| 64 | const auto& src = regs.viewport_transform[index]; | 66 | const auto& src = regs.viewport_transform[index]; |
| 65 | const float width = src.scale_x * 2.0f; | 67 | const float width = src.scale_x * 2.0f; |
| 66 | const float height = src.scale_y * 2.0f; | 68 | const float height = src.scale_y * 2.0f; |
| @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si | |||
| 83 | return viewport; | 85 | return viewport; |
| 84 | } | 86 | } |
| 85 | 87 | ||
| 86 | VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | 88 | VkRect2D GetScissorState(const Maxwell& regs, size_t index) { |
| 87 | const auto& src = regs.scissor_test[index]; | 89 | const auto& src = regs.scissor_test[index]; |
| 88 | VkRect2D scissor; | 90 | VkRect2D scissor; |
| 89 | if (src.enable) { | 91 | if (src.enable) { |
| @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | |||
| 103 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | 105 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |
| 104 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 106 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 105 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | 107 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; |
| 106 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | 108 | for (size_t i = 0; i < std::size(addresses); ++i) { |
| 107 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | 109 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; |
| 108 | } | 110 | } |
| 109 | return addresses; | 111 | return addresses; |
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, | 114 | struct TextureHandle { |
| 113 | VkAccessFlags access) { | 115 | constexpr TextureHandle(u32 data, bool via_header_index) { |
| 114 | for (auto& [view, layout] : views) { | 116 | const Tegra::Texture::TextureHandle handle{data}; |
| 115 | view->Transition(*layout, pipeline_stage, access); | 117 | image = handle.tic_id; |
| 118 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 116 | } | 119 | } |
| 117 | } | 120 | |
| 121 | u32 image; | ||
| 122 | u32 sampler; | ||
| 123 | }; | ||
| 118 | 124 | ||
| 119 | template <typename Engine, typename Entry> | 125 | template <typename Engine, typename Entry> |
| 120 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 126 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |
| 121 | std::size_t stage, std::size_t index = 0) { | 127 | size_t stage, size_t index = 0) { |
| 122 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | 128 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 123 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | 129 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { |
| 124 | if (entry.is_separated) { | 130 | if (entry.is_separated) { |
| 125 | const u32 buffer_1 = entry.buffer; | 131 | const u32 buffer_1 = entry.buffer; |
| 126 | const u32 buffer_2 = entry.secondary_buffer; | 132 | const u32 buffer_2 = entry.secondary_buffer; |
| 127 | const u32 offset_1 = entry.offset; | 133 | const u32 offset_1 = entry.offset; |
| 128 | const u32 offset_2 = entry.secondary_offset; | 134 | const u32 offset_2 = entry.secondary_offset; |
| 129 | const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); | 135 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |
| 130 | const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); | 136 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |
| 131 | return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); | 137 | return TextureHandle(handle_1 | handle_2, via_header_index); |
| 132 | } | 138 | } |
| 133 | } | 139 | } |
| 134 | if (entry.is_bindless) { | 140 | if (entry.is_bindless) { |
| 135 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); | 141 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 136 | return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); | 142 | return TextureHandle(raw, via_header_index); |
| 137 | } | ||
| 138 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 139 | const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 140 | const u32 offset = entry.offset + entry_offset; | ||
| 141 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 142 | return engine.GetStageTexture(stage_type, offset); | ||
| 143 | } else { | ||
| 144 | return engine.GetTexture(offset); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /// @brief Determine if an attachment to be updated has to preserve contents | ||
| 149 | /// @param is_clear True when a clear is being executed | ||
| 150 | /// @param regs 3D registers | ||
| 151 | /// @return True when the contents have to be preserved | ||
| 152 | bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { | ||
| 153 | if (!is_clear) { | ||
| 154 | return true; | ||
| 155 | } | ||
| 156 | // First we have to make sure all clear masks are enabled. | ||
| 157 | if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || | ||
| 158 | !regs.clear_buffers.A) { | ||
| 159 | return true; | ||
| 160 | } | ||
| 161 | // If scissors are disabled, the whole screen is cleared | ||
| 162 | if (!regs.clear_flags.scissor) { | ||
| 163 | return false; | ||
| 164 | } | 143 | } |
| 165 | // Then we have to confirm scissor testing clears the whole image | 144 | const u32 buffer = engine.GetBoundBuffer(); |
| 166 | const std::size_t index = regs.clear_buffers.RT; | 145 | const u64 offset = (entry.offset + index) * sizeof(u32); |
| 167 | const auto& scissor = regs.scissor_test[0]; | 146 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 168 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || | ||
| 169 | scissor.max_y < regs.rt[index].height; | ||
| 170 | } | 147 | } |
| 171 | 148 | ||
| 172 | /// @brief Determine if an attachment to be updated has to preserve contents | 149 | template <size_t N> |
| 173 | /// @param is_clear True when a clear is being executed | ||
| 174 | /// @param regs 3D registers | ||
| 175 | /// @return True when the contents have to be preserved | ||
| 176 | bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { | ||
| 177 | // If we are not clearing, the contents have to be preserved | ||
| 178 | if (!is_clear) { | ||
| 179 | return true; | ||
| 180 | } | ||
| 181 | // For depth stencil clears we only have to confirm scissor test covers the whole image | ||
| 182 | if (!regs.clear_flags.scissor) { | ||
| 183 | return false; | ||
| 184 | } | ||
| 185 | // Make sure the clear cover the whole image | ||
| 186 | const auto& scissor = regs.scissor_test[0]; | ||
| 187 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || | ||
| 188 | scissor.max_y < regs.zeta_height; | ||
| 189 | } | ||
| 190 | |||
| 191 | template <std::size_t N> | ||
| 192 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | 150 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { |
| 193 | std::array<VkDeviceSize, N> expanded; | 151 | std::array<VkDeviceSize, N> expanded; |
| 194 | std::copy(strides.begin(), strides.end(), expanded.begin()); | 152 | std::copy(strides.begin(), strides.end(), expanded.begin()); |
| 195 | return expanded; | 153 | return expanded; |
| 196 | } | 154 | } |
| 197 | 155 | ||
| 156 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 157 | if (entry.is_buffer) { | ||
| 158 | return ImageViewType::e2D; | ||
| 159 | } | ||
| 160 | switch (entry.type) { | ||
| 161 | case Tegra::Shader::TextureType::Texture1D: | ||
| 162 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 163 | case Tegra::Shader::TextureType::Texture2D: | ||
| 164 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 165 | case Tegra::Shader::TextureType::Texture3D: | ||
| 166 | return ImageViewType::e3D; | ||
| 167 | case Tegra::Shader::TextureType::TextureCube: | ||
| 168 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 169 | } | ||
| 170 | UNREACHABLE(); | ||
| 171 | return ImageViewType::e2D; | ||
| 172 | } | ||
| 173 | |||
| 174 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 175 | switch (entry.type) { | ||
| 176 | case Tegra::Shader::ImageType::Texture1D: | ||
| 177 | return ImageViewType::e1D; | ||
| 178 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 179 | return ImageViewType::e1DArray; | ||
| 180 | case Tegra::Shader::ImageType::Texture2D: | ||
| 181 | return ImageViewType::e2D; | ||
| 182 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 183 | return ImageViewType::e2DArray; | ||
| 184 | case Tegra::Shader::ImageType::Texture3D: | ||
| 185 | return ImageViewType::e3D; | ||
| 186 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 187 | return ImageViewType::Buffer; | ||
| 188 | } | ||
| 189 | UNREACHABLE(); | ||
| 190 | return ImageViewType::e2D; | ||
| 191 | } | ||
| 192 | |||
| 193 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 194 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 195 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 196 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 197 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 198 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 199 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 200 | } | ||
| 201 | for (const auto& entry : entries.samplers) { | ||
| 202 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 203 | const VkSampler sampler = *sampler_ptr++; | ||
| 204 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 205 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 206 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 207 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 211 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 212 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 213 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 214 | } | ||
| 215 | for (const auto& entry : entries.images) { | ||
| 216 | // TODO: Mark as modified | ||
| 217 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 218 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 219 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 220 | update_descriptor_queue.AddImage(handle); | ||
| 221 | } | ||
| 222 | } | ||
| 223 | |||
| 198 | } // Anonymous namespace | 224 | } // Anonymous namespace |
| 199 | 225 | ||
| 200 | class BufferBindings final { | 226 | class BufferBindings final { |
| @@ -290,7 +316,7 @@ public: | |||
| 290 | private: | 316 | private: |
| 291 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | 317 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. |
| 292 | struct { | 318 | struct { |
| 293 | std::size_t num_buffers = 0; | 319 | size_t num_buffers = 0; |
| 294 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | 320 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; |
| 295 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | 321 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; |
| 296 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | 322 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; |
| @@ -303,7 +329,7 @@ private: | |||
| 303 | VkIndexType type; | 329 | VkIndexType type; |
| 304 | } index; | 330 | } index; |
| 305 | 331 | ||
| 306 | template <std::size_t N> | 332 | template <size_t N> |
| 307 | void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { | 333 | void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { |
| 308 | if (device.IsExtExtendedDynamicStateSupported()) { | 334 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 309 | if (index.buffer) { | 335 | if (index.buffer) { |
| @@ -320,7 +346,7 @@ private: | |||
| 320 | } | 346 | } |
| 321 | } | 347 | } |
| 322 | 348 | ||
| 323 | template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> | 349 | template <size_t N, bool is_indexed, bool has_extended_dynamic_state> |
| 324 | void BindStatic(VKScheduler& scheduler) const { | 350 | void BindStatic(VKScheduler& scheduler) const { |
| 325 | static_assert(N <= Maxwell::NumVertexArrays); | 351 | static_assert(N <= Maxwell::NumVertexArrays); |
| 326 | if constexpr (N == 0) { | 352 | if constexpr (N == 0) { |
| @@ -385,20 +411,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 385 | Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, | 411 | Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, |
| 386 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 412 | const VKDevice& device_, VKMemoryManager& memory_manager_, |
| 387 | StateTracker& state_tracker_, VKScheduler& scheduler_) | 413 | StateTracker& state_tracker_, VKScheduler& scheduler_) |
| 388 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), | 414 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, |
| 389 | maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), | 415 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, |
| 390 | device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), | 416 | screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, |
| 391 | scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), | 417 | state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), |
| 392 | descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), | 418 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), |
| 393 | renderpass_cache(device), | 419 | update_descriptor_queue(device, scheduler), |
| 420 | blit_image(device, scheduler, state_tracker, descriptor_pool), | ||
| 394 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 421 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 395 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 422 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 396 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 423 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 397 | texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), | 424 | texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, |
| 425 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||
| 398 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 426 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 399 | descriptor_pool, update_descriptor_queue, renderpass_cache), | 427 | descriptor_pool, update_descriptor_queue), |
| 400 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), | 428 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, |
| 401 | sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), | 429 | staging_pool), |
| 430 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | ||
| 402 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, | 431 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, |
| 403 | scheduler), | 432 | scheduler), |
| 404 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 433 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 427 | const DrawParameters draw_params = | 456 | const DrawParameters draw_params = |
| 428 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | 457 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); |
| 429 | 458 | ||
| 430 | update_descriptor_queue.Acquire(); | 459 | auto lock = texture_cache.AcquireLock(); |
| 431 | sampled_views.clear(); | 460 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 432 | image_views.clear(); | 461 | |
| 462 | texture_cache.UpdateRenderTargets(false); | ||
| 433 | 463 | ||
| 434 | const auto shaders = pipeline_cache.GetShaders(); | 464 | const auto shaders = pipeline_cache.GetShaders(); |
| 435 | key.shaders = GetShaderAddresses(shaders); | 465 | key.shaders = GetShaderAddresses(shaders); |
| @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 437 | 467 | ||
| 438 | buffer_cache.Unmap(); | 468 | buffer_cache.Unmap(); |
| 439 | 469 | ||
| 440 | const Texceptions texceptions = UpdateAttachments(false); | 470 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 441 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | 471 | key.renderpass = framebuffer->RenderPass(); |
| 442 | |||
| 443 | key.renderpass_params = GetRenderPassParams(texceptions); | ||
| 444 | key.padding = 0; | ||
| 445 | 472 | ||
| 446 | auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); | 473 | auto* const pipeline = |
| 474 | pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 447 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | 475 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { |
| 448 | // Async graphics pipeline was not ready. | 476 | // Async graphics pipeline was not ready. |
| 449 | return; | 477 | return; |
| 450 | } | 478 | } |
| 451 | 479 | ||
| 452 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 453 | |||
| 454 | const auto renderpass = pipeline->GetRenderPass(); | ||
| 455 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 456 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); | ||
| 457 | |||
| 458 | UpdateDynamicStates(); | ||
| 459 | |||
| 460 | buffer_bindings.Bind(device, scheduler); | 480 | buffer_bindings.Bind(device, scheduler); |
| 461 | 481 | ||
| 462 | BeginTransformFeedback(); | 482 | BeginTransformFeedback(); |
| 463 | 483 | ||
| 484 | scheduler.RequestRenderpass(framebuffer); | ||
| 485 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 486 | UpdateDynamicStates(); | ||
| 487 | |||
| 464 | const auto pipeline_layout = pipeline->GetLayout(); | 488 | const auto pipeline_layout = pipeline->GetLayout(); |
| 465 | const auto descriptor_set = pipeline->CommitDescriptorSet(); | 489 | const auto descriptor_set = pipeline->CommitDescriptorSet(); |
| 466 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | 490 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() { | |||
| 481 | return; | 505 | return; |
| 482 | } | 506 | } |
| 483 | 507 | ||
| 484 | sampled_views.clear(); | ||
| 485 | image_views.clear(); | ||
| 486 | |||
| 487 | query_cache.UpdateCounters(); | 508 | query_cache.UpdateCounters(); |
| 488 | 509 | ||
| 489 | const auto& regs = maxwell3d.regs; | 510 | const auto& regs = maxwell3d.regs; |
| @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() { | |||
| 495 | return; | 516 | return; |
| 496 | } | 517 | } |
| 497 | 518 | ||
| 498 | [[maybe_unused]] const auto texceptions = UpdateAttachments(true); | 519 | auto lock = texture_cache.AcquireLock(); |
| 499 | DEBUG_ASSERT(texceptions.none()); | 520 | texture_cache.UpdateRenderTargets(true); |
| 500 | SetupImageTransitions(0, color_attachments, zeta_attachment); | 521 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 522 | const VkExtent2D render_area = framebuffer->RenderArea(); | ||
| 523 | scheduler.RequestRenderpass(framebuffer); | ||
| 501 | 524 | ||
| 502 | const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); | 525 | VkClearRect clear_rect{ |
| 503 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | 526 | .rect = GetScissorState(regs, 0), |
| 504 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); | 527 | .baseArrayLayer = regs.clear_buffers.layer, |
| 505 | 528 | .layerCount = 1, | |
| 506 | VkClearRect clear_rect; | 529 | }; |
| 507 | clear_rect.baseArrayLayer = regs.clear_buffers.layer; | 530 | if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { |
| 508 | clear_rect.layerCount = 1; | 531 | return; |
| 509 | clear_rect.rect = GetScissorState(regs, 0); | 532 | } |
| 510 | clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); | 533 | clear_rect.rect.extent = VkExtent2D{ |
| 511 | clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); | 534 | .width = std::min(clear_rect.rect.extent.width, render_area.width), |
| 535 | .height = std::min(clear_rect.rect.extent.height, render_area.height), | ||
| 536 | }; | ||
| 512 | 537 | ||
| 513 | if (use_color) { | 538 | if (use_color) { |
| 514 | VkClearValue clear_value; | 539 | VkClearValue clear_value; |
| @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() { | |||
| 549 | 574 | ||
| 550 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 575 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { |
| 551 | MICROPROFILE_SCOPE(Vulkan_Compute); | 576 | MICROPROFILE_SCOPE(Vulkan_Compute); |
| 552 | update_descriptor_queue.Acquire(); | ||
| 553 | sampled_views.clear(); | ||
| 554 | image_views.clear(); | ||
| 555 | 577 | ||
| 556 | query_cache.UpdateCounters(); | 578 | query_cache.UpdateCounters(); |
| 557 | 579 | ||
| @@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 570 | // Compute dispatches can't be executed inside a renderpass | 592 | // Compute dispatches can't be executed inside a renderpass |
| 571 | scheduler.RequestOutsideRenderPassOperationContext(); | 593 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 572 | 594 | ||
| 573 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | 595 | image_view_indices.clear(); |
| 596 | sampler_handles.clear(); | ||
| 597 | |||
| 598 | auto lock = texture_cache.AcquireLock(); | ||
| 599 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 574 | 600 | ||
| 575 | const auto& entries = pipeline.GetEntries(); | 601 | const auto& entries = pipeline.GetEntries(); |
| 576 | SetupComputeConstBuffers(entries); | ||
| 577 | SetupComputeGlobalBuffers(entries); | ||
| 578 | SetupComputeUniformTexels(entries); | 602 | SetupComputeUniformTexels(entries); |
| 579 | SetupComputeTextures(entries); | 603 | SetupComputeTextures(entries); |
| 580 | SetupComputeStorageTexels(entries); | 604 | SetupComputeStorageTexels(entries); |
| 581 | SetupComputeImages(entries); | 605 | SetupComputeImages(entries); |
| 582 | 606 | ||
| 583 | buffer_cache.Unmap(); | 607 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 608 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 584 | 609 | ||
| 585 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 610 | buffer_cache.Map(CalculateComputeStreamBufferSize()); |
| 586 | VK_ACCESS_SHADER_READ_BIT); | ||
| 587 | TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 588 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); | ||
| 589 | 611 | ||
| 612 | update_descriptor_queue.Acquire(); | ||
| 613 | |||
| 614 | SetupComputeConstBuffers(entries); | ||
| 615 | SetupComputeGlobalBuffers(entries); | ||
| 616 | |||
| 617 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 618 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 619 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 620 | sampler_ptr); | ||
| 621 | |||
| 622 | buffer_cache.Unmap(); | ||
| 623 | |||
| 624 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 625 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 626 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 590 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | 627 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, |
| 591 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | 628 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, |
| 592 | layout = pipeline.GetLayout(), | 629 | descriptor_set](vk::CommandBuffer cmdbuf) { |
| 593 | descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { | ||
| 594 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | 630 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); |
| 595 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, | 631 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, |
| 596 | descriptor_set, {}); | 632 | descriptor_set, {}); |
| 597 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | 633 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); |
| 598 | }); | 634 | }); |
| @@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 613 | if (addr == 0 || size == 0) { | 649 | if (addr == 0 || size == 0) { |
| 614 | return; | 650 | return; |
| 615 | } | 651 | } |
| 616 | texture_cache.FlushRegion(addr, size); | 652 | { |
| 653 | auto lock = texture_cache.AcquireLock(); | ||
| 654 | texture_cache.DownloadMemory(addr, size); | ||
| 655 | } | ||
| 617 | buffer_cache.FlushRegion(addr, size); | 656 | buffer_cache.FlushRegion(addr, size); |
| 618 | query_cache.FlushRegion(addr, size); | 657 | query_cache.FlushRegion(addr, size); |
| 619 | } | 658 | } |
| @@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | |||
| 622 | if (!Settings::IsGPULevelHigh()) { | 661 | if (!Settings::IsGPULevelHigh()) { |
| 623 | return buffer_cache.MustFlushRegion(addr, size); | 662 | return buffer_cache.MustFlushRegion(addr, size); |
| 624 | } | 663 | } |
| 625 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | 664 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 665 | buffer_cache.MustFlushRegion(addr, size); | ||
| 626 | } | 666 | } |
| 627 | 667 | ||
| 628 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 668 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 629 | if (addr == 0 || size == 0) { | 669 | if (addr == 0 || size == 0) { |
| 630 | return; | 670 | return; |
| 631 | } | 671 | } |
| 632 | texture_cache.InvalidateRegion(addr, size); | 672 | { |
| 673 | auto lock = texture_cache.AcquireLock(); | ||
| 674 | texture_cache.WriteMemory(addr, size); | ||
| 675 | } | ||
| 633 | pipeline_cache.InvalidateRegion(addr, size); | 676 | pipeline_cache.InvalidateRegion(addr, size); |
| 634 | buffer_cache.InvalidateRegion(addr, size); | 677 | buffer_cache.InvalidateRegion(addr, size); |
| 635 | query_cache.InvalidateRegion(addr, size); | 678 | query_cache.InvalidateRegion(addr, size); |
| @@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 639 | if (addr == 0 || size == 0) { | 682 | if (addr == 0 || size == 0) { |
| 640 | return; | 683 | return; |
| 641 | } | 684 | } |
| 642 | texture_cache.OnCPUWrite(addr, size); | 685 | { |
| 686 | auto lock = texture_cache.AcquireLock(); | ||
| 687 | texture_cache.WriteMemory(addr, size); | ||
| 688 | } | ||
| 643 | pipeline_cache.OnCPUWrite(addr, size); | 689 | pipeline_cache.OnCPUWrite(addr, size); |
| 644 | buffer_cache.OnCPUWrite(addr, size); | 690 | buffer_cache.OnCPUWrite(addr, size); |
| 645 | } | 691 | } |
| 646 | 692 | ||
| 647 | void RasterizerVulkan::SyncGuestHost() { | 693 | void RasterizerVulkan::SyncGuestHost() { |
| 648 | texture_cache.SyncGuestHost(); | ||
| 649 | buffer_cache.SyncGuestHost(); | 694 | buffer_cache.SyncGuestHost(); |
| 650 | pipeline_cache.SyncGuestHost(); | 695 | pipeline_cache.SyncGuestHost(); |
| 651 | } | 696 | } |
| 652 | 697 | ||
| 698 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||
| 699 | { | ||
| 700 | auto lock = texture_cache.AcquireLock(); | ||
| 701 | texture_cache.UnmapMemory(addr, size); | ||
| 702 | } | ||
| 703 | buffer_cache.OnCPUWrite(addr, size); | ||
| 704 | pipeline_cache.OnCPUWrite(addr, size); | ||
| 705 | } | ||
| 706 | |||
| 653 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 707 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 654 | if (!gpu.IsAsync()) { | 708 | if (!gpu.IsAsync()) { |
| 655 | gpu_memory.Write<u32>(addr, value); | 709 | gpu_memory.Write<u32>(addr, value); |
| @@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 700 | }); | 754 | }); |
| 701 | } | 755 | } |
| 702 | 756 | ||
| 757 | void RasterizerVulkan::FragmentBarrier() { | ||
| 758 | // We already put barriers when a render pass finishes | ||
| 759 | } | ||
| 760 | |||
| 761 | void RasterizerVulkan::TiledCacheBarrier() { | ||
| 762 | // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend | ||
| 763 | } | ||
| 764 | |||
| 703 | void RasterizerVulkan::FlushCommands() { | 765 | void RasterizerVulkan::FlushCommands() { |
| 704 | if (draw_counter > 0) { | 766 | if (draw_counter > 0) { |
| 705 | draw_counter = 0; | 767 | draw_counter = 0; |
| @@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() { | |||
| 710 | void RasterizerVulkan::TickFrame() { | 772 | void RasterizerVulkan::TickFrame() { |
| 711 | draw_counter = 0; | 773 | draw_counter = 0; |
| 712 | update_descriptor_queue.TickFrame(); | 774 | update_descriptor_queue.TickFrame(); |
| 775 | fence_manager.TickFrame(); | ||
| 713 | buffer_cache.TickFrame(); | 776 | buffer_cache.TickFrame(); |
| 714 | staging_pool.TickFrame(); | 777 | staging_pool.TickFrame(); |
| 778 | { | ||
| 779 | auto lock = texture_cache.AcquireLock(); | ||
| 780 | texture_cache.TickFrame(); | ||
| 781 | } | ||
| 715 | } | 782 | } |
| 716 | 783 | ||
| 717 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 784 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 718 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 785 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 719 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 786 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 720 | texture_cache.DoFermiCopy(src, dst, copy_config); | 787 | auto lock = texture_cache.AcquireLock(); |
| 788 | texture_cache.BlitImage(dst, src, copy_config); | ||
| 721 | return true; | 789 | return true; |
| 722 | } | 790 | } |
| 723 | 791 | ||
| @@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 727 | return false; | 795 | return false; |
| 728 | } | 796 | } |
| 729 | 797 | ||
| 730 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | 798 | auto lock = texture_cache.AcquireLock(); |
| 731 | if (!surface) { | 799 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); |
| 800 | if (!image_view) { | ||
| 732 | return false; | 801 | return false; |
| 733 | } | 802 | } |
| 734 | 803 | ||
| 735 | // Verify that the cached surface is the same size and format as the requested framebuffer | 804 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); |
| 736 | const auto& params{surface->GetSurfaceParams()}; | 805 | screen_info.width = image_view->size.width; |
| 737 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 806 | screen_info.height = image_view->size.height; |
| 738 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 807 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 739 | |||
| 740 | screen_info.image = &surface->GetImage(); | ||
| 741 | screen_info.width = params.width; | ||
| 742 | screen_info.height = params.height; | ||
| 743 | screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; | ||
| 744 | return true; | 808 | return true; |
| 745 | } | 809 | } |
| 746 | 810 | ||
| @@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() { | |||
| 765 | draw_counter = 0; | 829 | draw_counter = 0; |
| 766 | } | 830 | } |
| 767 | 831 | ||
| 768 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { | ||
| 769 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 770 | |||
| 771 | const auto& regs = maxwell3d.regs; | ||
| 772 | auto& dirty = maxwell3d.dirty.flags; | ||
| 773 | const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; | ||
| 774 | dirty[VideoCommon::Dirty::RenderTargets] = false; | ||
| 775 | |||
| 776 | texture_cache.GuardRenderTargets(true); | ||
| 777 | |||
| 778 | Texceptions texceptions; | ||
| 779 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 780 | if (update_rendertargets) { | ||
| 781 | const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); | ||
| 782 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); | ||
| 783 | } | ||
| 784 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | ||
| 785 | texceptions[rt] = true; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 789 | if (update_rendertargets) { | ||
| 790 | const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); | ||
| 791 | zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); | ||
| 792 | } | ||
| 793 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | ||
| 794 | texceptions[ZETA_TEXCEPTION_INDEX] = true; | ||
| 795 | } | ||
| 796 | |||
| 797 | texture_cache.GuardRenderTargets(false); | ||
| 798 | |||
| 799 | return texceptions; | ||
| 800 | } | ||
| 801 | |||
| 802 | bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { | ||
| 803 | bool overlap = false; | ||
| 804 | for (auto& [view, layout] : sampled_views) { | ||
| 805 | if (!attachment.IsSameSurface(*view)) { | ||
| 806 | continue; | ||
| 807 | } | ||
| 808 | overlap = true; | ||
| 809 | *layout = VK_IMAGE_LAYOUT_GENERAL; | ||
| 810 | } | ||
| 811 | return overlap; | ||
| 812 | } | ||
| 813 | |||
| 814 | std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | ||
| 815 | VkRenderPass renderpass) { | ||
| 816 | FramebufferCacheKey key{ | ||
| 817 | .renderpass = renderpass, | ||
| 818 | .width = std::numeric_limits<u32>::max(), | ||
| 819 | .height = std::numeric_limits<u32>::max(), | ||
| 820 | .layers = std::numeric_limits<u32>::max(), | ||
| 821 | .views = {}, | ||
| 822 | }; | ||
| 823 | |||
| 824 | const auto try_push = [&key](const View& view) { | ||
| 825 | if (!view) { | ||
| 826 | return false; | ||
| 827 | } | ||
| 828 | key.views.push_back(view->GetAttachment()); | ||
| 829 | key.width = std::min(key.width, view->GetWidth()); | ||
| 830 | key.height = std::min(key.height, view->GetHeight()); | ||
| 831 | key.layers = std::min(key.layers, view->GetNumLayers()); | ||
| 832 | return true; | ||
| 833 | }; | ||
| 834 | |||
| 835 | const auto& regs = maxwell3d.regs; | ||
| 836 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); | ||
| 837 | for (std::size_t index = 0; index < num_attachments; ++index) { | ||
| 838 | if (try_push(color_attachments[index])) { | ||
| 839 | texture_cache.MarkColorBufferInUse(index); | ||
| 840 | } | ||
| 841 | } | ||
| 842 | if (try_push(zeta_attachment)) { | ||
| 843 | texture_cache.MarkDepthBufferInUse(); | ||
| 844 | } | ||
| 845 | |||
| 846 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | ||
| 847 | auto& framebuffer = fbentry->second; | ||
| 848 | if (is_cache_miss) { | ||
| 849 | framebuffer = device.GetLogical().CreateFramebuffer({ | ||
| 850 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 851 | .pNext = nullptr, | ||
| 852 | .flags = 0, | ||
| 853 | .renderPass = key.renderpass, | ||
| 854 | .attachmentCount = static_cast<u32>(key.views.size()), | ||
| 855 | .pAttachments = key.views.data(), | ||
| 856 | .width = key.width, | ||
| 857 | .height = key.height, | ||
| 858 | .layers = key.layers, | ||
| 859 | }); | ||
| 860 | } | ||
| 861 | |||
| 862 | return {*framebuffer, VkExtent2D{key.width, key.height}}; | ||
| 863 | } | ||
| 864 | |||
| 865 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | 832 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, |
| 866 | BufferBindings& buffer_bindings, | 833 | BufferBindings& buffer_bindings, |
| 867 | bool is_indexed, | 834 | bool is_indexed, |
| @@ -885,50 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt | |||
| 885 | 852 | ||
| 886 | void RasterizerVulkan::SetupShaderDescriptors( | 853 | void RasterizerVulkan::SetupShaderDescriptors( |
| 887 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 854 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 888 | texture_cache.GuardSamplers(true); | 855 | image_view_indices.clear(); |
| 889 | 856 | sampler_handles.clear(); | |
| 890 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 857 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 891 | // Skip VertexA stage | ||
| 892 | Shader* const shader = shaders[stage + 1]; | 858 | Shader* const shader = shaders[stage + 1]; |
| 893 | if (!shader) { | 859 | if (!shader) { |
| 894 | continue; | 860 | continue; |
| 895 | } | 861 | } |
| 896 | const auto& entries = shader->GetEntries(); | 862 | const auto& entries = shader->GetEntries(); |
| 897 | SetupGraphicsConstBuffers(entries, stage); | ||
| 898 | SetupGraphicsGlobalBuffers(entries, stage); | ||
| 899 | SetupGraphicsUniformTexels(entries, stage); | 863 | SetupGraphicsUniformTexels(entries, stage); |
| 900 | SetupGraphicsTextures(entries, stage); | 864 | SetupGraphicsTextures(entries, stage); |
| 901 | SetupGraphicsStorageTexels(entries, stage); | 865 | SetupGraphicsStorageTexels(entries, stage); |
| 902 | SetupGraphicsImages(entries, stage); | 866 | SetupGraphicsImages(entries, stage); |
| 903 | } | 867 | } |
| 904 | texture_cache.GuardSamplers(false); | 868 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 905 | } | 869 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 906 | 870 | ||
| 907 | void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, | 871 | update_descriptor_queue.Acquire(); |
| 908 | const ZetaAttachment& zeta) { | ||
| 909 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); | ||
| 910 | TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, | ||
| 911 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); | ||
| 912 | 872 | ||
| 913 | for (std::size_t rt = 0; rt < color.size(); ++rt) { | 873 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| 914 | const auto color_attachment = color[rt]; | 874 | VkSampler* sampler_ptr = sampler_handles.data(); |
| 915 | if (color_attachment == nullptr) { | 875 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 876 | // Skip VertexA stage | ||
| 877 | Shader* const shader = shaders[stage + 1]; | ||
| 878 | if (!shader) { | ||
| 916 | continue; | 879 | continue; |
| 917 | } | 880 | } |
| 918 | const auto image_layout = | 881 | const auto& entries = shader->GetEntries(); |
| 919 | texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | 882 | SetupGraphicsConstBuffers(entries, stage); |
| 920 | color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | 883 | SetupGraphicsGlobalBuffers(entries, stage); |
| 921 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | 884 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |
| 922 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); | 885 | sampler_ptr); |
| 923 | } | ||
| 924 | |||
| 925 | if (zeta != nullptr) { | ||
| 926 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | ||
| 927 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 928 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | ||
| 929 | zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, | ||
| 930 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 931 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); | ||
| 932 | } | 886 | } |
| 933 | } | 887 | } |
| 934 | 888 | ||
| @@ -1000,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 1000 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | 954 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { |
| 1001 | const auto& regs = maxwell3d.regs; | 955 | const auto& regs = maxwell3d.regs; |
| 1002 | 956 | ||
| 1003 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 957 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 1004 | const auto& vertex_array = regs.vertex_array[index]; | 958 | const auto& vertex_array = regs.vertex_array[index]; |
| 1005 | if (!vertex_array.IsEnabled()) { | 959 | if (!vertex_array.IsEnabled()) { |
| 1006 | continue; | 960 | continue; |
| @@ -1009,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | |||
| 1009 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 963 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| 1010 | 964 | ||
| 1011 | ASSERT(end >= start); | 965 | ASSERT(end >= start); |
| 1012 | const std::size_t size = end - start; | 966 | const size_t size = end - start; |
| 1013 | if (size == 0) { | 967 | if (size == 0) { |
| 1014 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | 968 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); |
| 1015 | continue; | 969 | continue; |
| @@ -1070,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 1070 | } | 1024 | } |
| 1071 | } | 1025 | } |
| 1072 | 1026 | ||
| 1073 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { | 1027 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { |
| 1074 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | 1028 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); |
| 1075 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | 1029 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; |
| 1076 | for (const auto& entry : entries.const_buffers) { | 1030 | for (const auto& entry : entries.const_buffers) { |
| @@ -1078,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s | |||
| 1078 | } | 1032 | } |
| 1079 | } | 1033 | } |
| 1080 | 1034 | ||
| 1081 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { | 1035 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { |
| 1082 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | 1036 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); |
| 1083 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | 1037 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; |
| 1084 | 1038 | ||
| @@ -1088,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, | |||
| 1088 | } | 1042 | } |
| 1089 | } | 1043 | } |
| 1090 | 1044 | ||
| 1091 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { | 1045 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |
| 1092 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1046 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1047 | const auto& regs = maxwell3d.regs; | ||
| 1048 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1093 | for (const auto& entry : entries.uniform_texels) { | 1049 | for (const auto& entry : entries.uniform_texels) { |
| 1094 | const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; | 1050 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1095 | SetupUniformTexels(image, entry); | 1051 | image_view_indices.push_back(handle.image); |
| 1096 | } | 1052 | } |
| 1097 | } | 1053 | } |
| 1098 | 1054 | ||
| 1099 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { | 1055 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |
| 1100 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1056 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1057 | const auto& regs = maxwell3d.regs; | ||
| 1058 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1101 | for (const auto& entry : entries.samplers) { | 1059 | for (const auto& entry : entries.samplers) { |
| 1102 | for (std::size_t i = 0; i < entry.size; ++i) { | 1060 | for (size_t index = 0; index < entry.size; ++index) { |
| 1103 | const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); | 1061 | const TextureHandle handle = |
| 1104 | SetupTexture(texture, entry); | 1062 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); |
| 1063 | image_view_indices.push_back(handle.image); | ||
| 1064 | |||
| 1065 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 1066 | sampler_handles.push_back(sampler->Handle()); | ||
| 1105 | } | 1067 | } |
| 1106 | } | 1068 | } |
| 1107 | } | 1069 | } |
| 1108 | 1070 | ||
| 1109 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { | 1071 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |
| 1110 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1072 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1073 | const auto& regs = maxwell3d.regs; | ||
| 1074 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1111 | for (const auto& entry : entries.storage_texels) { | 1075 | for (const auto& entry : entries.storage_texels) { |
| 1112 | const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; | 1076 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1113 | SetupStorageTexel(image, entry); | 1077 | image_view_indices.push_back(handle.image); |
| 1114 | } | 1078 | } |
| 1115 | } | 1079 | } |
| 1116 | 1080 | ||
| 1117 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | 1081 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |
| 1118 | MICROPROFILE_SCOPE(Vulkan_Images); | 1082 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1083 | const auto& regs = maxwell3d.regs; | ||
| 1084 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1119 | for (const auto& entry : entries.images) { | 1085 | for (const auto& entry : entries.images) { |
| 1120 | const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; | 1086 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1121 | SetupImage(tic, entry); | 1087 | image_view_indices.push_back(handle.image); |
| 1122 | } | 1088 | } |
| 1123 | } | 1089 | } |
| 1124 | 1090 | ||
| @@ -1128,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | |||
| 1128 | for (const auto& entry : entries.const_buffers) { | 1094 | for (const auto& entry : entries.const_buffers) { |
| 1129 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 1095 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 1130 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 1096 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 1131 | Tegra::Engines::ConstBufferInfo buffer; | 1097 | const Tegra::Engines::ConstBufferInfo info{ |
| 1132 | buffer.address = config.Address(); | 1098 | .address = config.Address(), |
| 1133 | buffer.size = config.size; | 1099 | .size = config.size, |
| 1134 | buffer.enabled = mask[entry.GetIndex()]; | 1100 | .enabled = mask[entry.GetIndex()], |
| 1135 | SetupConstBuffer(entry, buffer); | 1101 | }; |
| 1102 | SetupConstBuffer(entry, info); | ||
| 1136 | } | 1103 | } |
| 1137 | } | 1104 | } |
| 1138 | 1105 | ||
| @@ -1147,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | |||
| 1147 | 1114 | ||
| 1148 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | 1115 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1149 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1116 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1117 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1150 | for (const auto& entry : entries.uniform_texels) { | 1118 | for (const auto& entry : entries.uniform_texels) { |
| 1151 | const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1119 | const TextureHandle handle = |
| 1152 | SetupUniformTexels(image, entry); | 1120 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1121 | image_view_indices.push_back(handle.image); | ||
| 1153 | } | 1122 | } |
| 1154 | } | 1123 | } |
| 1155 | 1124 | ||
| 1156 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | 1125 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |
| 1157 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1126 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1127 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1158 | for (const auto& entry : entries.samplers) { | 1128 | for (const auto& entry : entries.samplers) { |
| 1159 | for (std::size_t i = 0; i < entry.size; ++i) { | 1129 | for (size_t index = 0; index < entry.size; ++index) { |
| 1160 | const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); | 1130 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, |
| 1161 | SetupTexture(texture, entry); | 1131 | COMPUTE_SHADER_INDEX, index); |
| 1132 | image_view_indices.push_back(handle.image); | ||
| 1133 | |||
| 1134 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 1135 | sampler_handles.push_back(sampler->Handle()); | ||
| 1162 | } | 1136 | } |
| 1163 | } | 1137 | } |
| 1164 | } | 1138 | } |
| 1165 | 1139 | ||
| 1166 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | 1140 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |
| 1167 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1141 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1142 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1168 | for (const auto& entry : entries.storage_texels) { | 1143 | for (const auto& entry : entries.storage_texels) { |
| 1169 | const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1144 | const TextureHandle handle = |
| 1170 | SetupStorageTexel(image, entry); | 1145 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1146 | image_view_indices.push_back(handle.image); | ||
| 1171 | } | 1147 | } |
| 1172 | } | 1148 | } |
| 1173 | 1149 | ||
| 1174 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 1150 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1175 | MICROPROFILE_SCOPE(Vulkan_Images); | 1151 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1152 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1176 | for (const auto& entry : entries.images) { | 1153 | for (const auto& entry : entries.images) { |
| 1177 | const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1154 | const TextureHandle handle = |
| 1178 | SetupImage(tic, entry); | 1155 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1156 | image_view_indices.push_back(handle.image); | ||
| 1179 | } | 1157 | } |
| 1180 | } | 1158 | } |
| 1181 | 1159 | ||
| @@ -1186,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | |||
| 1186 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | 1164 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); |
| 1187 | return; | 1165 | return; |
| 1188 | } | 1166 | } |
| 1189 | |||
| 1190 | // Align the size to avoid bad std140 interactions | 1167 | // Align the size to avoid bad std140 interactions |
| 1191 | const std::size_t size = | 1168 | const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); |
| 1192 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 1193 | ASSERT(size <= MaxConstbufferSize); | 1169 | ASSERT(size <= MaxConstbufferSize); |
| 1194 | 1170 | ||
| 1195 | const auto info = | 1171 | const u64 alignment = device.GetUniformBufferAlignment(); |
| 1196 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | 1172 | const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 1197 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | 1173 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1198 | } | 1174 | } |
| 1199 | 1175 | ||
| @@ -1206,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1206 | // because Vulkan doesn't like empty buffers. | 1182 | // because Vulkan doesn't like empty buffers. |
| 1207 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | 1183 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the |
| 1208 | // default buffer. | 1184 | // default buffer. |
| 1209 | static constexpr std::size_t dummy_size = 4; | 1185 | static constexpr size_t dummy_size = 4; |
| 1210 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | 1186 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); |
| 1211 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | 1187 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); |
| 1212 | return; | 1188 | return; |
| @@ -1217,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1217 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | 1193 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1218 | } | 1194 | } |
| 1219 | 1195 | ||
| 1220 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, | ||
| 1221 | const UniformTexelEntry& entry) { | ||
| 1222 | const auto view = texture_cache.GetTextureSurface(tic, entry); | ||
| 1223 | ASSERT(view->IsBufferView()); | ||
| 1224 | |||
| 1225 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, | ||
| 1229 | const SamplerEntry& entry) { | ||
| 1230 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 1231 | ASSERT(!view->IsBufferView()); | ||
| 1232 | |||
| 1233 | const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, | ||
| 1234 | texture.tic.z_source, texture.tic.w_source); | ||
| 1235 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 1236 | update_descriptor_queue.AddSampledImage(sampler, image_view); | ||
| 1237 | |||
| 1238 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); | ||
| 1239 | *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | ||
| 1240 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, | ||
| 1244 | const StorageTexelEntry& entry) { | ||
| 1245 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1246 | ASSERT(view->IsBufferView()); | ||
| 1247 | |||
| 1248 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | ||
| 1252 | auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1253 | |||
| 1254 | if (entry.is_written) { | ||
| 1255 | view->MarkAsModified(texture_cache.Tick()); | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | UNIMPLEMENTED_IF(tic.IsBuffer()); | ||
| 1259 | |||
| 1260 | const VkImageView image_view = | ||
| 1261 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1262 | update_descriptor_queue.AddImage(image_view); | ||
| 1263 | |||
| 1264 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); | ||
| 1265 | *image_layout = VK_IMAGE_LAYOUT_GENERAL; | ||
| 1266 | image_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 1196 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1270 | if (!state_tracker.TouchViewports()) { | 1197 | if (!state_tracker.TouchViewports()) { |
| 1271 | return; | 1198 | return; |
| @@ -1457,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1457 | }); | 1384 | }); |
| 1458 | } | 1385 | } |
| 1459 | 1386 | ||
| 1460 | std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | 1387 | size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { |
| 1461 | std::size_t size = CalculateVertexArraysSize(); | 1388 | size_t size = CalculateVertexArraysSize(); |
| 1462 | if (is_indexed) { | 1389 | if (is_indexed) { |
| 1463 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | 1390 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); |
| 1464 | } | 1391 | } |
| @@ -1466,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) | |||
| 1466 | return size; | 1393 | return size; |
| 1467 | } | 1394 | } |
| 1468 | 1395 | ||
| 1469 | std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | 1396 | size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { |
| 1470 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | 1397 | return Tegra::Engines::KeplerCompute::NumConstBuffers * |
| 1471 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 1398 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 1472 | } | 1399 | } |
| 1473 | 1400 | ||
| 1474 | std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | 1401 | size_t RasterizerVulkan::CalculateVertexArraysSize() const { |
| 1475 | const auto& regs = maxwell3d.regs; | 1402 | const auto& regs = maxwell3d.regs; |
| 1476 | 1403 | ||
| 1477 | std::size_t size = 0; | 1404 | size_t size = 0; |
| 1478 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 1405 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 1479 | // This implementation assumes that all attributes are used in the shader. | 1406 | // This implementation assumes that all attributes are used in the shader. |
| 1480 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | 1407 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; |
| @@ -1486,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | |||
| 1486 | return size; | 1413 | return size; |
| 1487 | } | 1414 | } |
| 1488 | 1415 | ||
| 1489 | std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { | 1416 | size_t RasterizerVulkan::CalculateIndexBufferSize() const { |
| 1490 | return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * | 1417 | return static_cast<size_t>(maxwell3d.regs.index_array.count) * |
| 1491 | static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | 1418 | static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); |
| 1492 | } | 1419 | } |
| 1493 | 1420 | ||
| 1494 | std::size_t RasterizerVulkan::CalculateConstBufferSize( | 1421 | size_t RasterizerVulkan::CalculateConstBufferSize( |
| 1495 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | 1422 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { |
| 1496 | if (entry.IsIndirect()) { | 1423 | if (entry.IsIndirect()) { |
| 1497 | // Buffer is accessed indirectly, so upload the entire thing | 1424 | // Buffer is accessed indirectly, so upload the entire thing |
| @@ -1502,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( | |||
| 1502 | } | 1429 | } |
| 1503 | } | 1430 | } |
| 1504 | 1431 | ||
| 1505 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | ||
| 1506 | const auto& regs = maxwell3d.regs; | ||
| 1507 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); | ||
| 1508 | |||
| 1509 | RenderPassParams params; | ||
| 1510 | params.color_formats = {}; | ||
| 1511 | std::size_t color_texceptions = 0; | ||
| 1512 | |||
| 1513 | std::size_t index = 0; | ||
| 1514 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { | ||
| 1515 | const auto& rendertarget = regs.rt[rt]; | ||
| 1516 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1517 | continue; | ||
| 1518 | } | ||
| 1519 | params.color_formats[index] = static_cast<u8>(rendertarget.format); | ||
| 1520 | color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; | ||
| 1521 | ++index; | ||
| 1522 | } | ||
| 1523 | params.num_color_attachments = static_cast<u8>(index); | ||
| 1524 | params.texceptions = static_cast<u8>(color_texceptions); | ||
| 1525 | |||
| 1526 | params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0; | ||
| 1527 | params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1528 | return params; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | VkBuffer RasterizerVulkan::DefaultBuffer() { | 1432 | VkBuffer RasterizerVulkan::DefaultBuffer() { |
| 1532 | if (default_buffer) { | 1433 | if (default_buffer) { |
| 1533 | return *default_buffer; | 1434 | return *default_buffer; |
| 1534 | } | 1435 | } |
| 1535 | |||
| 1536 | default_buffer = device.GetLogical().CreateBuffer({ | 1436 | default_buffer = device.GetLogical().CreateBuffer({ |
| 1537 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 1437 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 1538 | .pNext = nullptr, | 1438 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 30ec58eb4..990f9e031 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -11,11 +11,11 @@ | |||
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | 12 | ||
| 13 | #include <boost/container/static_vector.hpp> | 13 | #include <boost/container/static_vector.hpp> |
| 14 | #include <boost/functional/hash.hpp> | ||
| 15 | 14 | ||
| 16 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 17 | #include "video_core/rasterizer_accelerated.h" | 16 | #include "video_core/rasterizer_accelerated.h" |
| 18 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -24,10 +24,9 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 30 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 30 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 32 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 31 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 33 | #include "video_core/renderer_vulkan/wrapper.h" | 32 | #include "video_core/renderer_vulkan/wrapper.h" |
| @@ -49,60 +48,9 @@ namespace Vulkan { | |||
| 49 | 48 | ||
| 50 | struct VKScreenInfo; | 49 | struct VKScreenInfo; |
| 51 | 50 | ||
| 52 | using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; | ||
| 53 | |||
| 54 | struct FramebufferCacheKey { | ||
| 55 | VkRenderPass renderpass{}; | ||
| 56 | u32 width = 0; | ||
| 57 | u32 height = 0; | ||
| 58 | u32 layers = 0; | ||
| 59 | ImageViewsPack views; | ||
| 60 | |||
| 61 | std::size_t Hash() const noexcept { | ||
| 62 | std::size_t hash = 0; | ||
| 63 | boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); | ||
| 64 | for (const auto& view : views) { | ||
| 65 | boost::hash_combine(hash, static_cast<VkImageView>(view)); | ||
| 66 | } | ||
| 67 | boost::hash_combine(hash, width); | ||
| 68 | boost::hash_combine(hash, height); | ||
| 69 | boost::hash_combine(hash, layers); | ||
| 70 | return hash; | ||
| 71 | } | ||
| 72 | |||
| 73 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 74 | return std::tie(renderpass, views, width, height, layers) == | ||
| 75 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); | ||
| 76 | } | ||
| 77 | |||
| 78 | bool operator!=(const FramebufferCacheKey& rhs) const noexcept { | ||
| 79 | return !operator==(rhs); | ||
| 80 | } | ||
| 81 | }; | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
| 84 | |||
| 85 | namespace std { | ||
| 86 | |||
| 87 | template <> | ||
| 88 | struct hash<Vulkan::FramebufferCacheKey> { | ||
| 89 | std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { | ||
| 90 | return k.Hash(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | |||
| 94 | } // namespace std | ||
| 95 | |||
| 96 | namespace Vulkan { | ||
| 97 | |||
| 98 | class StateTracker; | 51 | class StateTracker; |
| 99 | class BufferBindings; | 52 | class BufferBindings; |
| 100 | 53 | ||
| 101 | struct ImageView { | ||
| 102 | View view; | ||
| 103 | VkImageLayout* layout = nullptr; | ||
| 104 | }; | ||
| 105 | |||
| 106 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 54 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 107 | public: | 55 | public: |
| 108 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 56 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -123,15 +71,18 @@ public: | |||
| 123 | void InvalidateRegion(VAddr addr, u64 size) override; | 71 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void OnCPUWrite(VAddr addr, u64 size) override; | 72 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 125 | void SyncGuestHost() override; | 73 | void SyncGuestHost() override; |
| 74 | void UnmapMemory(VAddr addr, u64 size) override; | ||
| 126 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 75 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 127 | void SignalSyncPoint(u32 value) override; | 76 | void SignalSyncPoint(u32 value) override; |
| 128 | void ReleaseFences() override; | 77 | void ReleaseFences() override; |
| 129 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 78 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 130 | void WaitForIdle() override; | 79 | void WaitForIdle() override; |
| 80 | void FragmentBarrier() override; | ||
| 81 | void TiledCacheBarrier() override; | ||
| 131 | void FlushCommands() override; | 82 | void FlushCommands() override; |
| 132 | void TickFrame() override; | 83 | void TickFrame() override; |
| 133 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 84 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 134 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 85 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 135 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 86 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 136 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 87 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 137 | u32 pixel_stride) override; | 88 | u32 pixel_stride) override; |
| @@ -145,11 +96,17 @@ public: | |||
| 145 | } | 96 | } |
| 146 | 97 | ||
| 147 | /// Maximum supported size that a constbuffer can have in bytes. | 98 | /// Maximum supported size that a constbuffer can have in bytes. |
| 148 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | 99 | static constexpr size_t MaxConstbufferSize = 0x10000; |
| 149 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | 100 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, |
| 150 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | 101 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); |
| 151 | 102 | ||
| 152 | private: | 103 | private: |
| 104 | static constexpr size_t MAX_TEXTURES = 192; | ||
| 105 | static constexpr size_t MAX_IMAGES = 48; | ||
| 106 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | ||
| 107 | |||
| 108 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||
| 109 | |||
| 153 | struct DrawParameters { | 110 | struct DrawParameters { |
| 154 | void Draw(vk::CommandBuffer cmdbuf) const; | 111 | void Draw(vk::CommandBuffer cmdbuf) const; |
| 155 | 112 | ||
| @@ -160,23 +117,8 @@ private: | |||
| 160 | bool is_indexed = 0; | 117 | bool is_indexed = 0; |
| 161 | }; | 118 | }; |
| 162 | 119 | ||
| 163 | using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>; | ||
| 164 | using ZetaAttachment = View; | ||
| 165 | |||
| 166 | using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; | ||
| 167 | |||
| 168 | static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; | ||
| 169 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||
| 170 | |||
| 171 | void FlushWork(); | 120 | void FlushWork(); |
| 172 | 121 | ||
| 173 | /// @brief Updates the currently bound attachments | ||
| 174 | /// @param is_clear True when the framebuffer is updated as a clear | ||
| 175 | /// @return Bitfield of attachments being used as sampled textures | ||
| 176 | Texceptions UpdateAttachments(bool is_clear); | ||
| 177 | |||
| 178 | std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); | ||
| 179 | |||
| 180 | /// Setups geometry buffers and state. | 122 | /// Setups geometry buffers and state. |
| 181 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | 123 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, |
| 182 | bool is_indexed, bool is_instanced); | 124 | bool is_indexed, bool is_instanced); |
| @@ -184,17 +126,12 @@ private: | |||
| 184 | /// Setup descriptors in the graphics pipeline. | 126 | /// Setup descriptors in the graphics pipeline. |
| 185 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | 127 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); |
| 186 | 128 | ||
| 187 | void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, | ||
| 188 | const ZetaAttachment& zeta); | ||
| 189 | |||
| 190 | void UpdateDynamicStates(); | 129 | void UpdateDynamicStates(); |
| 191 | 130 | ||
| 192 | void BeginTransformFeedback(); | 131 | void BeginTransformFeedback(); |
| 193 | 132 | ||
| 194 | void EndTransformFeedback(); | 133 | void EndTransformFeedback(); |
| 195 | 134 | ||
| 196 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | ||
| 197 | |||
| 198 | void SetupVertexArrays(BufferBindings& buffer_bindings); | 135 | void SetupVertexArrays(BufferBindings& buffer_bindings); |
| 199 | 136 | ||
| 200 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | 137 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); |
| @@ -240,14 +177,6 @@ private: | |||
| 240 | 177 | ||
| 241 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | 178 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); |
| 242 | 179 | ||
| 243 | void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); | ||
| 244 | |||
| 245 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | ||
| 246 | |||
| 247 | void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); | ||
| 248 | |||
| 249 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 250 | |||
| 251 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 180 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 252 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 181 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 253 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 182 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -264,18 +193,16 @@ private: | |||
| 264 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 193 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 265 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 194 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 266 | 195 | ||
| 267 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | 196 | size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; |
| 268 | |||
| 269 | std::size_t CalculateComputeStreamBufferSize() const; | ||
| 270 | 197 | ||
| 271 | std::size_t CalculateVertexArraysSize() const; | 198 | size_t CalculateComputeStreamBufferSize() const; |
| 272 | 199 | ||
| 273 | std::size_t CalculateIndexBufferSize() const; | 200 | size_t CalculateVertexArraysSize() const; |
| 274 | 201 | ||
| 275 | std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | 202 | size_t CalculateIndexBufferSize() const; |
| 276 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 277 | 203 | ||
| 278 | RenderPassParams GetRenderPassParams(Texceptions texceptions) const; | 204 | size_t CalculateConstBufferSize(const ConstBufferEntry& entry, |
| 205 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 279 | 206 | ||
| 280 | VkBuffer DefaultBuffer(); | 207 | VkBuffer DefaultBuffer(); |
| 281 | 208 | ||
| @@ -290,18 +217,19 @@ private: | |||
| 290 | StateTracker& state_tracker; | 217 | StateTracker& state_tracker; |
| 291 | VKScheduler& scheduler; | 218 | VKScheduler& scheduler; |
| 292 | 219 | ||
| 220 | VKStreamBuffer stream_buffer; | ||
| 293 | VKStagingBufferPool staging_pool; | 221 | VKStagingBufferPool staging_pool; |
| 294 | VKDescriptorPool descriptor_pool; | 222 | VKDescriptorPool descriptor_pool; |
| 295 | VKUpdateDescriptorQueue update_descriptor_queue; | 223 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 296 | VKRenderPassCache renderpass_cache; | 224 | BlitImageHelper blit_image; |
| 297 | QuadArrayPass quad_array_pass; | 225 | QuadArrayPass quad_array_pass; |
| 298 | QuadIndexedPass quad_indexed_pass; | 226 | QuadIndexedPass quad_indexed_pass; |
| 299 | Uint8Pass uint8_pass; | 227 | Uint8Pass uint8_pass; |
| 300 | 228 | ||
| 301 | VKTextureCache texture_cache; | 229 | TextureCacheRuntime texture_cache_runtime; |
| 230 | TextureCache texture_cache; | ||
| 302 | VKPipelineCache pipeline_cache; | 231 | VKPipelineCache pipeline_cache; |
| 303 | VKBufferCache buffer_cache; | 232 | VKBufferCache buffer_cache; |
| 304 | VKSamplerCache sampler_cache; | ||
| 305 | VKQueryCache query_cache; | 233 | VKQueryCache query_cache; |
| 306 | VKFenceManager fence_manager; | 234 | VKFenceManager fence_manager; |
| 307 | 235 | ||
| @@ -310,16 +238,11 @@ private: | |||
| 310 | vk::Event wfi_event; | 238 | vk::Event wfi_event; |
| 311 | VideoCommon::Shader::AsyncShaders async_shaders; | 239 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 312 | 240 | ||
| 313 | ColorAttachments color_attachments; | 241 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 314 | ZetaAttachment zeta_attachment; | 242 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 315 | 243 | boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles; | |
| 316 | std::vector<ImageView> sampled_views; | ||
| 317 | std::vector<ImageView> image_views; | ||
| 318 | 244 | ||
| 319 | u32 draw_counter = 0; | 245 | u32 draw_counter = 0; |
| 320 | |||
| 321 | // TODO(Rodrigo): Invalidate on image destruction | ||
| 322 | std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; | ||
| 323 | }; | 246 | }; |
| 324 | 247 | ||
| 325 | } // namespace Vulkan | 248 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index e812c7dd6..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null | |||
| @@ -1,158 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/cityhash.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | |||
| 16 | namespace Vulkan { | ||
| 17 | |||
| 18 | std::size_t RenderPassParams::Hash() const noexcept { | ||
| 19 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||
| 20 | return static_cast<std::size_t>(hash); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { | ||
| 24 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 25 | } | ||
| 26 | |||
| 27 | VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} | ||
| 28 | |||
| 29 | VKRenderPassCache::~VKRenderPassCache() = default; | ||
| 30 | |||
| 31 | VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { | ||
| 32 | const auto [pair, is_cache_miss] = cache.try_emplace(params); | ||
| 33 | auto& entry = pair->second; | ||
| 34 | if (is_cache_miss) { | ||
| 35 | entry = CreateRenderPass(params); | ||
| 36 | } | ||
| 37 | return *entry; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { | ||
| 41 | using namespace VideoCore::Surface; | ||
| 42 | const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); | ||
| 43 | |||
| 44 | std::vector<VkAttachmentDescription> descriptors; | ||
| 45 | descriptors.reserve(num_attachments); | ||
| 46 | |||
| 47 | std::vector<VkAttachmentReference> color_references; | ||
| 48 | color_references.reserve(num_attachments); | ||
| 49 | |||
| 50 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { | ||
| 51 | const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); | ||
| 52 | const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); | ||
| 53 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 54 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 55 | static_cast<int>(pixel_format)); | ||
| 56 | |||
| 57 | // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. | ||
| 58 | const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 | ||
| 59 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 60 | : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | ||
| 61 | descriptors.push_back({ | ||
| 62 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 63 | .format = format.format, | ||
| 64 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 65 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 66 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 67 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, | ||
| 68 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, | ||
| 69 | .initialLayout = color_layout, | ||
| 70 | .finalLayout = color_layout, | ||
| 71 | }); | ||
| 72 | |||
| 73 | color_references.push_back({ | ||
| 74 | .attachment = static_cast<u32>(rt), | ||
| 75 | .layout = color_layout, | ||
| 76 | }); | ||
| 77 | } | ||
| 78 | |||
| 79 | VkAttachmentReference zeta_attachment_ref; | ||
| 80 | const bool has_zeta = params.zeta_format != 0; | ||
| 81 | if (has_zeta) { | ||
| 82 | const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format); | ||
| 83 | const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); | ||
| 84 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 85 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 86 | static_cast<int>(pixel_format)); | ||
| 87 | |||
| 88 | const VkImageLayout zeta_layout = params.zeta_texception != 0 | ||
| 89 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 90 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | ||
| 91 | descriptors.push_back({ | ||
| 92 | .flags = 0, | ||
| 93 | .format = format.format, | ||
| 94 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 95 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 96 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 97 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 98 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 99 | .initialLayout = zeta_layout, | ||
| 100 | .finalLayout = zeta_layout, | ||
| 101 | }); | ||
| 102 | |||
| 103 | zeta_attachment_ref = { | ||
| 104 | .attachment = static_cast<u32>(num_attachments), | ||
| 105 | .layout = zeta_layout, | ||
| 106 | }; | ||
| 107 | } | ||
| 108 | |||
| 109 | const VkSubpassDescription subpass_description{ | ||
| 110 | .flags = 0, | ||
| 111 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 112 | .inputAttachmentCount = 0, | ||
| 113 | .pInputAttachments = nullptr, | ||
| 114 | .colorAttachmentCount = static_cast<u32>(color_references.size()), | ||
| 115 | .pColorAttachments = color_references.data(), | ||
| 116 | .pResolveAttachments = nullptr, | ||
| 117 | .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, | ||
| 118 | .preserveAttachmentCount = 0, | ||
| 119 | .pPreserveAttachments = nullptr, | ||
| 120 | }; | ||
| 121 | |||
| 122 | VkAccessFlags access = 0; | ||
| 123 | VkPipelineStageFlags stage = 0; | ||
| 124 | if (!color_references.empty()) { | ||
| 125 | access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | ||
| 126 | stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | ||
| 127 | } | ||
| 128 | |||
| 129 | if (has_zeta) { | ||
| 130 | access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 131 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | ||
| 132 | stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | ||
| 133 | } | ||
| 134 | |||
| 135 | const VkSubpassDependency subpass_dependency{ | ||
| 136 | .srcSubpass = VK_SUBPASS_EXTERNAL, | ||
| 137 | .dstSubpass = 0, | ||
| 138 | .srcStageMask = stage, | ||
| 139 | .dstStageMask = stage, | ||
| 140 | .srcAccessMask = 0, | ||
| 141 | .dstAccessMask = access, | ||
| 142 | .dependencyFlags = 0, | ||
| 143 | }; | ||
| 144 | |||
| 145 | return device.GetLogical().CreateRenderPass({ | ||
| 146 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 147 | .pNext = nullptr, | ||
| 148 | .flags = 0, | ||
| 149 | .attachmentCount = static_cast<u32>(descriptors.size()), | ||
| 150 | .pAttachments = descriptors.data(), | ||
| 151 | .subpassCount = 1, | ||
| 152 | .pSubpasses = &subpass_description, | ||
| 153 | .dependencyCount = 1, | ||
| 154 | .pDependencies = &subpass_dependency, | ||
| 155 | }); | ||
| 156 | } | ||
| 157 | |||
| 158 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 652ecef7b..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null | |||
| @@ -1,70 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <boost/container/static_vector.hpp> | ||
| 11 | #include <boost/functional/hash.hpp> | ||
| 12 | |||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | #include "video_core/surface.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | class VKDevice; | ||
| 20 | |||
| 21 | struct RenderPassParams { | ||
| 22 | std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats; | ||
| 23 | u8 num_color_attachments; | ||
| 24 | u8 texceptions; | ||
| 25 | |||
| 26 | u8 zeta_format; | ||
| 27 | u8 zeta_texception; | ||
| 28 | |||
| 29 | std::size_t Hash() const noexcept; | ||
| 30 | |||
| 31 | bool operator==(const RenderPassParams& rhs) const noexcept; | ||
| 32 | |||
| 33 | bool operator!=(const RenderPassParams& rhs) const noexcept { | ||
| 34 | return !operator==(rhs); | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | static_assert(std::has_unique_object_representations_v<RenderPassParams>); | ||
| 38 | static_assert(std::is_trivially_copyable_v<RenderPassParams>); | ||
| 39 | static_assert(std::is_trivially_constructible_v<RenderPassParams>); | ||
| 40 | |||
| 41 | } // namespace Vulkan | ||
| 42 | |||
| 43 | namespace std { | ||
| 44 | |||
| 45 | template <> | ||
| 46 | struct hash<Vulkan::RenderPassParams> { | ||
| 47 | std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { | ||
| 48 | return k.Hash(); | ||
| 49 | } | ||
| 50 | }; | ||
| 51 | |||
| 52 | } // namespace std | ||
| 53 | |||
| 54 | namespace Vulkan { | ||
| 55 | |||
| 56 | class VKRenderPassCache final { | ||
| 57 | public: | ||
| 58 | explicit VKRenderPassCache(const VKDevice& device_); | ||
| 59 | ~VKRenderPassCache(); | ||
| 60 | |||
| 61 | VkRenderPass GetRenderPass(const RenderPassParams& params); | ||
| 62 | |||
| 63 | private: | ||
| 64 | vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; | ||
| 65 | |||
| 66 | const VKDevice& device; | ||
| 67 | std::unordered_map<RenderPassParams, vk::RenderPass> cache; | ||
| 68 | }; | ||
| 69 | |||
| 70 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b859691fa..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null | |||
| @@ -1,83 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 9 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | using Tegra::Texture::TextureMipmapFilter; | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | VkBorderColor ConvertBorderColor(std::array<float, 4> color) { | ||
| 19 | // TODO(Rodrigo): Manage integer border colors | ||
| 20 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | ||
| 21 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 22 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { | ||
| 23 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 24 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { | ||
| 25 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 26 | } | ||
| 27 | if (color[0] + color[1] + color[2] > 1.35f) { | ||
| 28 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 29 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 30 | } else if (color[3] > 0.5f) { | ||
| 31 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 32 | } else { | ||
| 33 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} | ||
| 40 | |||
| 41 | VKSamplerCache::~VKSamplerCache() = default; | ||
| 42 | |||
| 43 | vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { | ||
| 44 | const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); | ||
| 45 | const std::array color = tsc.GetBorderColor(); | ||
| 46 | |||
| 47 | VkSamplerCustomBorderColorCreateInfoEXT border{ | ||
| 48 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, | ||
| 49 | .pNext = nullptr, | ||
| 50 | .customBorderColor = {}, | ||
| 51 | .format = VK_FORMAT_UNDEFINED, | ||
| 52 | }; | ||
| 53 | std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); | ||
| 54 | |||
| 55 | return device.GetLogical().CreateSampler({ | ||
| 56 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 57 | .pNext = arbitrary_borders ? &border : nullptr, | ||
| 58 | .flags = 0, | ||
| 59 | .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), | ||
| 60 | .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), | ||
| 61 | .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | ||
| 62 | .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), | ||
| 63 | .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), | ||
| 64 | .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), | ||
| 65 | .mipLodBias = tsc.GetLodBias(), | ||
| 66 | .anisotropyEnable = | ||
| 67 | static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), | ||
| 68 | .maxAnisotropy = tsc.GetMaxAnisotropy(), | ||
| 69 | .compareEnable = tsc.depth_compare_enabled, | ||
| 70 | .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), | ||
| 71 | .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), | ||
| 72 | .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), | ||
| 73 | .borderColor = | ||
| 74 | arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), | ||
| 75 | .unnormalizedCoordinates = VK_FALSE, | ||
| 76 | }); | ||
| 77 | } | ||
| 78 | |||
| 79 | VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { | ||
| 80 | return *sampler; | ||
| 81 | } | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index 3f22c4610..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 8 | #include "video_core/sampler_cache.h" | ||
| 9 | #include "video_core/textures/texture.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class VKDevice; | ||
| 14 | |||
| 15 | class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { | ||
| 16 | public: | ||
| 17 | explicit VKSamplerCache(const VKDevice& device_); | ||
| 18 | ~VKSamplerCache(); | ||
| 19 | |||
| 20 | protected: | ||
| 21 | vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; | ||
| 22 | |||
| 23 | VkSampler ToSamplerType(const vk::Sampler& sampler) const override; | ||
| 24 | |||
| 25 | private: | ||
| 26 | const VKDevice& device; | ||
| 27 | }; | ||
| 28 | |||
| 29 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..c104c6fe3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 18 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 19 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | 20 | #include "video_core/renderer_vulkan/wrapper.h" |
| 20 | 21 | ||
| 21 | namespace Vulkan { | 22 | namespace Vulkan { |
| @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() { | |||
| 96 | AcquireNewChunk(); | 97 | AcquireNewChunk(); |
| 97 | } | 98 | } |
| 98 | 99 | ||
| 99 | void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, | 100 | void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { |
| 100 | VkExtent2D render_area) { | 101 | const VkRenderPass renderpass = framebuffer->RenderPass(); |
| 101 | if (renderpass == state.renderpass && framebuffer == state.framebuffer && | 102 | const VkFramebuffer framebuffer_handle = framebuffer->Handle(); |
| 103 | const VkExtent2D render_area = framebuffer->RenderArea(); | ||
| 104 | if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && | ||
| 102 | render_area.width == state.render_area.width && | 105 | render_area.width == state.render_area.width && |
| 103 | render_area.height == state.render_area.height) { | 106 | render_area.height == state.render_area.height) { |
| 104 | return; | 107 | return; |
| 105 | } | 108 | } |
| 106 | const bool end_renderpass = state.renderpass != nullptr; | 109 | EndRenderPass(); |
| 107 | state.renderpass = renderpass; | 110 | state.renderpass = renderpass; |
| 108 | state.framebuffer = framebuffer; | 111 | state.framebuffer = framebuffer_handle; |
| 109 | state.render_area = render_area; | 112 | state.render_area = render_area; |
| 110 | 113 | ||
| 111 | const VkRenderPassBeginInfo renderpass_bi{ | 114 | Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { |
| 112 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 115 | const VkRenderPassBeginInfo renderpass_bi{ |
| 113 | .pNext = nullptr, | 116 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 114 | .renderPass = renderpass, | 117 | .pNext = nullptr, |
| 115 | .framebuffer = framebuffer, | 118 | .renderPass = renderpass, |
| 116 | .renderArea = | 119 | .framebuffer = framebuffer_handle, |
| 117 | { | 120 | .renderArea = |
| 118 | .offset = {.x = 0, .y = 0}, | 121 | { |
| 119 | .extent = render_area, | 122 | .offset = {.x = 0, .y = 0}, |
| 120 | }, | 123 | .extent = render_area, |
| 121 | .clearValueCount = 0, | 124 | }, |
| 122 | .pClearValues = nullptr, | 125 | .clearValueCount = 0, |
| 123 | }; | 126 | .pClearValues = nullptr, |
| 124 | 127 | }; | |
| 125 | Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { | ||
| 126 | if (end_renderpass) { | ||
| 127 | cmdbuf.EndRenderPass(); | ||
| 128 | } | ||
| 129 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 128 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 130 | }); | 129 | }); |
| 130 | num_renderpass_images = framebuffer->NumImages(); | ||
| 131 | renderpass_images = framebuffer->Images(); | ||
| 132 | renderpass_image_ranges = framebuffer->ImageRanges(); | ||
| 131 | } | 133 | } |
| 132 | 134 | ||
| 133 | void VKScheduler::RequestOutsideRenderPassOperationContext() { | 135 | void VKScheduler::RequestOutsideRenderPassOperationContext() { |
| @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() { | |||
| 241 | if (!state.renderpass) { | 243 | if (!state.renderpass) { |
| 242 | return; | 244 | return; |
| 243 | } | 245 | } |
| 246 | Record([num_images = num_renderpass_images, images = renderpass_images, | ||
| 247 | ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { | ||
| 248 | std::array<VkImageMemoryBarrier, 9> barriers; | ||
| 249 | for (size_t i = 0; i < num_images; ++i) { | ||
| 250 | barriers[i] = VkImageMemoryBarrier{ | ||
| 251 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 252 | .pNext = nullptr, | ||
| 253 | .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 254 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, | ||
| 255 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 256 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 257 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 258 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 259 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, | ||
| 260 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 261 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 262 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 263 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 264 | .image = images[i], | ||
| 265 | .subresourceRange = ranges[i], | ||
| 266 | }; | ||
| 267 | } | ||
| 268 | cmdbuf.EndRenderPass(); | ||
| 269 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | ||
| 270 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | ||
| 271 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||
| 272 | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | ||
| 273 | vk::Span(barriers.data(), num_images)); | ||
| 274 | }); | ||
| 244 | state.renderpass = nullptr; | 275 | state.renderpass = nullptr; |
| 245 | Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); | 276 | num_renderpass_images = 0; |
| 246 | } | 277 | } |
| 247 | 278 | ||
| 248 | void VKScheduler::AcquireNewChunk() { | 279 | void VKScheduler::AcquireNewChunk() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6d3a5da0b..0a36c8fad 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | 18 | ||
| 19 | class CommandPool; | 19 | class CommandPool; |
| 20 | class Framebuffer; | ||
| 20 | class MasterSemaphore; | 21 | class MasterSemaphore; |
| 21 | class StateTracker; | 22 | class StateTracker; |
| 22 | class VKDevice; | 23 | class VKDevice; |
| @@ -52,8 +53,7 @@ public: | |||
| 52 | void DispatchWork(); | 53 | void DispatchWork(); |
| 53 | 54 | ||
| 54 | /// Requests to begin a renderpass. | 55 | /// Requests to begin a renderpass. |
| 55 | void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, | 56 | void RequestRenderpass(const Framebuffer* framebuffer); |
| 56 | VkExtent2D render_area); | ||
| 57 | 57 | ||
| 58 | /// Requests the current executino context to be able to execute operations only allowed outside | 58 | /// Requests the current executino context to be able to execute operations only allowed outside |
| 59 | /// of a renderpass. | 59 | /// of a renderpass. |
| @@ -62,6 +62,9 @@ public: | |||
| 62 | /// Binds a pipeline to the current execution context. | 62 | /// Binds a pipeline to the current execution context. |
| 63 | void BindGraphicsPipeline(VkPipeline pipeline); | 63 | void BindGraphicsPipeline(VkPipeline pipeline); |
| 64 | 64 | ||
| 65 | /// Invalidates current command buffer state except for render passes | ||
| 66 | void InvalidateState(); | ||
| 67 | |||
| 65 | /// Assigns the query cache. | 68 | /// Assigns the query cache. |
| 66 | void SetQueryCache(VKQueryCache& query_cache_) { | 69 | void SetQueryCache(VKQueryCache& query_cache_) { |
| 67 | query_cache = &query_cache_; | 70 | query_cache = &query_cache_; |
| @@ -170,8 +173,6 @@ private: | |||
| 170 | 173 | ||
| 171 | void AllocateNewContext(); | 174 | void AllocateNewContext(); |
| 172 | 175 | ||
| 173 | void InvalidateState(); | ||
| 174 | |||
| 175 | void EndPendingOperations(); | 176 | void EndPendingOperations(); |
| 176 | 177 | ||
| 177 | void EndRenderPass(); | 178 | void EndRenderPass(); |
| @@ -192,6 +193,11 @@ private: | |||
| 192 | std::thread worker_thread; | 193 | std::thread worker_thread; |
| 193 | 194 | ||
| 194 | State state; | 195 | State state; |
| 196 | |||
| 197 | u32 num_renderpass_images = 0; | ||
| 198 | std::array<VkImage, 9> renderpass_images{}; | ||
| 199 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; | ||
| 200 | |||
| 195 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; | 201 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; |
| 196 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | 202 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; |
| 197 | std::mutex mutex; | 203 | std::mutex mutex; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 72954d0e3..09d6f9f35 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -102,7 +102,7 @@ struct GenericVaryingDescription { | |||
| 102 | bool is_scalar = false; | 102 | bool is_scalar = false; |
| 103 | }; | 103 | }; |
| 104 | 104 | ||
| 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const SamplerEntry& sampler) { |
| 106 | ASSERT(!sampler.is_buffer); | 106 | ASSERT(!sampler.is_buffer); |
| 107 | switch (sampler.type) { | 107 | switch (sampler.type) { |
| 108 | case Tegra::Shader::TextureType::Texture1D: | 108 | case Tegra::Shader::TextureType::Texture1D: |
| @@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { | |||
| 119 | } | 119 | } |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | std::pair<spv::Dim, bool> GetImageDim(const Image& image) { | 122 | std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { |
| 123 | switch (image.type) { | 123 | switch (image.type) { |
| 124 | case Tegra::Shader::ImageType::Texture1D: | 124 | case Tegra::Shader::ImageType::Texture1D: |
| 125 | return {spv::Dim::Dim1D, false}; | 125 | return {spv::Dim::Dim1D, false}; |
| @@ -980,7 +980,7 @@ private: | |||
| 980 | return binding; | 980 | return binding; |
| 981 | } | 981 | } |
| 982 | 982 | ||
| 983 | void DeclareImage(const Image& image, u32& binding) { | 983 | void DeclareImage(const ImageEntry& image, u32& binding) { |
| 984 | const auto [dim, arrayed] = GetImageDim(image); | 984 | const auto [dim, arrayed] = GetImageDim(image); |
| 985 | constexpr int depth = 0; | 985 | constexpr int depth = 0; |
| 986 | constexpr bool ms = false; | 986 | constexpr bool ms = false; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index df1812514..ad91ad5de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -21,10 +21,10 @@ class VKDevice; | |||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 24 | using UniformTexelEntry = VideoCommon::Shader::Sampler; | 24 | using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; |
| 25 | using SamplerEntry = VideoCommon::Shader::Sampler; | 25 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; |
| 26 | using StorageTexelEntry = VideoCommon::Shader::Image; | 26 | using StorageTexelEntry = VideoCommon::Shader::ImageEntry; |
| 27 | using ImageEntry = VideoCommon::Shader::Image; | 27 | using ImageEntry = VideoCommon::Shader::ImageEntry; |
| 28 | 28 | ||
| 29 | constexpr u32 DESCRIPTOR_SET = 0; | 29 | constexpr u32 DESCRIPTOR_SET = 0; |
| 30 | 30 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..38a0be7f2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp | |||
| @@ -13,18 +13,13 @@ | |||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| 16 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { | 16 | vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) { |
| 17 | // Avoid undefined behavior by copying to a staging allocation | ||
| 18 | ASSERT(code_size % sizeof(u32) == 0); | ||
| 19 | const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); | ||
| 20 | std::memcpy(data.get(), code_data, code_size); | ||
| 21 | |||
| 22 | return device.GetLogical().CreateShaderModule({ | 17 | return device.GetLogical().CreateShaderModule({ |
| 23 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 18 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| 24 | .pNext = nullptr, | 19 | .pNext = nullptr, |
| 25 | .flags = 0, | 20 | .flags = 0, |
| 26 | .codeSize = code_size, | 21 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 27 | .pCode = data.get(), | 22 | .pCode = code.data(), |
| 28 | }); | 23 | }); |
| 29 | } | 24 | } |
| 30 | 25 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..dce34a140 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <span> | ||
| 8 | |||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/renderer_vulkan/wrapper.h" |
| 9 | 11 | ||
| @@ -11,6 +13,6 @@ namespace Vulkan { | |||
| 11 | 13 | ||
| 12 | class VKDevice; | 14 | class VKDevice; |
| 13 | 15 | ||
| 14 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); | 16 | vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code); |
| 15 | 17 | ||
| 16 | } // namespace Vulkan | 18 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 50164cc08..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | ||
| 6 | #include <cstddef> | 7 | #include <cstddef> |
| 7 | #include <iterator> | 8 | #include <iterator> |
| 8 | 9 | ||
| @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| 29 | using Flags = Maxwell3D::DirtyState::Flags; | 30 | using Flags = Maxwell3D::DirtyState::Flags; |
| 30 | 31 | ||
| 31 | Flags MakeInvalidationFlags() { | 32 | Flags MakeInvalidationFlags() { |
| 33 | static constexpr std::array INVALIDATION_FLAGS{ | ||
| 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | ||
| 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | ||
| 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, | ||
| 37 | }; | ||
| 32 | Flags flags{}; | 38 | Flags flags{}; |
| 33 | flags[Viewports] = true; | 39 | for (const int flag : INVALIDATION_FLAGS) { |
| 34 | flags[Scissors] = true; | 40 | flags[flag] = true; |
| 35 | flags[DepthBias] = true; | 41 | } |
| 36 | flags[BlendConstants] = true; | ||
| 37 | flags[DepthBounds] = true; | ||
| 38 | flags[StencilProperties] = true; | ||
| 39 | flags[CullMode] = true; | ||
| 40 | flags[DepthBoundsEnable] = true; | ||
| 41 | flags[DepthTestEnable] = true; | ||
| 42 | flags[DepthWriteEnable] = true; | ||
| 43 | flags[DepthCompareOp] = true; | ||
| 44 | flags[FrontFace] = true; | ||
| 45 | flags[StencilOp] = true; | ||
| 46 | flags[StencilTestEnable] = true; | ||
| 47 | return flags; | 42 | return flags; |
| 48 | } | 43 | } |
| 49 | 44 | ||
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -52,6 +52,14 @@ public: | |||
| 52 | current_topology = INVALID_TOPOLOGY; | 52 | current_topology = INVALID_TOPOLOGY; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void InvalidateViewports() { | ||
| 56 | flags[Dirty::Viewports] = true; | ||
| 57 | } | ||
| 58 | |||
| 59 | void InvalidateScissors() { | ||
| 60 | flags[Dirty::Scissors] = true; | ||
| 61 | } | ||
| 62 | |||
| 55 | bool TouchViewports() { | 63 | bool TouchViewports() { |
| 56 | return Exchange(Dirty::Viewports, false); | 64 | return Exchange(Dirty::Viewports, false); |
| 57 | } | 65 | } |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..419cb154d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -19,6 +19,10 @@ namespace Vulkan { | |||
| 19 | 19 | ||
| 20 | namespace { | 20 | namespace { |
| 21 | 21 | ||
| 22 | constexpr VkBufferUsageFlags BUFFER_USAGE = | ||
| 23 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 24 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | ||
| 25 | |||
| 22 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | 26 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; |
| 23 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | 27 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; |
| 24 | 28 | ||
| @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, | |||
| 56 | 60 | ||
| 57 | } // Anonymous namespace | 61 | } // Anonymous namespace |
| 58 | 62 | ||
| 59 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, | 63 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) |
| 60 | VkBufferUsageFlags usage) | ||
| 61 | : device{device_}, scheduler{scheduler_} { | 64 | : device{device_}, scheduler{scheduler_} { |
| 62 | CreateBuffers(usage); | 65 | CreateBuffers(); |
| 63 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); | 66 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); |
| 64 | ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); | 67 | ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); |
| 65 | } | 68 | } |
| 66 | 69 | ||
| 67 | VKStreamBuffer::~VKStreamBuffer() = default; | 70 | VKStreamBuffer::~VKStreamBuffer() = default; |
| 68 | 71 | ||
| 69 | std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | 72 | std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) { |
| 70 | ASSERT(size <= stream_buffer_size); | 73 | ASSERT(size <= stream_buffer_size); |
| 71 | mapped_size = size; | 74 | mapped_size = size; |
| 72 | 75 | ||
| @@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 76 | 79 | ||
| 77 | WaitPendingOperations(offset); | 80 | WaitPendingOperations(offset); |
| 78 | 81 | ||
| 79 | bool invalidated = false; | ||
| 80 | if (offset + size > stream_buffer_size) { | 82 | if (offset + size > stream_buffer_size) { |
| 81 | // The buffer would overflow, save the amount of used watches and reset the state. | 83 | // The buffer would overflow, save the amount of used watches and reset the state. |
| 82 | invalidation_mark = current_watch_cursor; | 84 | invalidation_mark = current_watch_cursor; |
| @@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 90 | 92 | ||
| 91 | // Ensure that we don't wait for uncommitted fences. | 93 | // Ensure that we don't wait for uncommitted fences. |
| 92 | scheduler.Flush(); | 94 | scheduler.Flush(); |
| 93 | |||
| 94 | invalidated = true; | ||
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | return {memory.Map(offset, size), offset, invalidated}; | 97 | return std::make_pair(memory.Map(offset, size), offset); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | void VKStreamBuffer::Unmap(u64 size) { | 100 | void VKStreamBuffer::Unmap(u64 size) { |
| @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) { | |||
| 113 | watch.tick = scheduler.CurrentTick(); | 113 | watch.tick = scheduler.CurrentTick(); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { | 116 | void VKStreamBuffer::CreateBuffers() { |
| 117 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | 117 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); |
| 118 | const u32 preferred_type = GetMemoryType(memory_properties); | 118 | const u32 preferred_type = GetMemoryType(memory_properties); |
| 119 | const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; | 119 | const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; |
| @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { | |||
| 127 | .pNext = nullptr, | 127 | .pNext = nullptr, |
| 128 | .flags = 0, | 128 | .flags = 0, |
| 129 | .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), | 129 | .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), |
| 130 | .usage = usage, | 130 | .usage = BUFFER_USAGE, |
| 131 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 131 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 132 | .queueFamilyIndexCount = 0, | 132 | .queueFamilyIndexCount = 0, |
| 133 | .pQueueFamilyIndices = nullptr, | 133 | .pQueueFamilyIndices = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..1428f77bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <tuple> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -19,17 +19,15 @@ class VKScheduler; | |||
| 19 | 19 | ||
| 20 | class VKStreamBuffer final { | 20 | class VKStreamBuffer final { |
| 21 | public: | 21 | public: |
| 22 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | 22 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); |
| 23 | VkBufferUsageFlags usage); | ||
| 24 | ~VKStreamBuffer(); | 23 | ~VKStreamBuffer(); |
| 25 | 24 | ||
| 26 | /** | 25 | /** |
| 27 | * Reserves a region of memory from the stream buffer. | 26 | * Reserves a region of memory from the stream buffer. |
| 28 | * @param size Size to reserve. | 27 | * @param size Size to reserve. |
| 29 | * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer | 28 | * @returns A pair of a raw memory pointer (with offset added), and the buffer offset |
| 30 | * offset and a boolean that's true when buffer has been invalidated. | ||
| 31 | */ | 29 | */ |
| 32 | std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); | 30 | std::pair<u8*, u64> Map(u64 size, u64 alignment); |
| 33 | 31 | ||
| 34 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 32 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 35 | void Unmap(u64 size); | 33 | void Unmap(u64 size); |
| @@ -49,7 +47,7 @@ private: | |||
| 49 | }; | 47 | }; |
| 50 | 48 | ||
| 51 | /// Creates Vulkan buffer handles committing the required the required memory. | 49 | /// Creates Vulkan buffer handles committing the required the required memory. |
| 52 | void CreateBuffers(VkBufferUsageFlags usage); | 50 | void CreateBuffers(); |
| 53 | 51 | ||
| 54 | /// Increases the amount of watches available. | 52 | /// Increases the amount of watches available. |
| 55 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); | 53 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae2e3322c..261808391 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -4,614 +4,1103 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <span> |
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <variant> | ||
| 11 | #include <vector> | 8 | #include <vector> |
| 12 | 9 | ||
| 13 | #include "common/assert.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 14 | #include "common/common_types.h" | 11 | #include "video_core/renderer_vulkan/blit_image.h" |
| 15 | #include "core/core.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/morton.h" | ||
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 19 | #include "video_core/renderer_vulkan/vk_device.h" | 13 | #include "video_core/renderer_vulkan/vk_device.h" |
| 20 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 23 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 24 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 25 | #include "video_core/renderer_vulkan/wrapper.h" | 17 | #include "video_core/renderer_vulkan/wrapper.h" |
| 26 | #include "video_core/surface.h" | ||
| 27 | 18 | ||
| 28 | namespace Vulkan { | 19 | namespace Vulkan { |
| 29 | 20 | ||
| 30 | using VideoCore::MortonSwizzle; | 21 | using Tegra::Engines::Fermi2D; |
| 31 | using VideoCore::MortonSwizzleMode; | ||
| 32 | |||
| 33 | using Tegra::Texture::SwizzleSource; | 22 | using Tegra::Texture::SwizzleSource; |
| 34 | using VideoCore::Surface::PixelFormat; | 23 | using Tegra::Texture::TextureMipmapFilter; |
| 35 | using VideoCore::Surface::SurfaceTarget; | 24 | using VideoCommon::BufferImageCopy; |
| 25 | using VideoCommon::ImageInfo; | ||
| 26 | using VideoCommon::ImageType; | ||
| 27 | using VideoCommon::SubresourceRange; | ||
| 28 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 36 | 29 | ||
| 37 | namespace { | 30 | namespace { |
| 38 | 31 | ||
| 39 | VkImageType SurfaceTargetToImage(SurfaceTarget target) { | 32 | constexpr std::array ATTACHMENT_REFERENCES{ |
| 40 | switch (target) { | 33 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, |
| 41 | case SurfaceTarget::Texture1D: | 34 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, |
| 42 | case SurfaceTarget::Texture1DArray: | 35 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, |
| 36 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 37 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 38 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 39 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 40 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 41 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 42 | }; | ||
| 43 | |||
| 44 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | ||
| 45 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | ||
| 46 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 47 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { | ||
| 48 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 49 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { | ||
| 50 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 51 | } | ||
| 52 | if (color[0] + color[1] + color[2] > 1.35f) { | ||
| 53 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 54 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 55 | } else if (color[3] > 0.5f) { | ||
| 56 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 57 | } else { | ||
| 58 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | [[nodiscard]] VkImageType ConvertImageType(const ImageType type) { | ||
| 63 | switch (type) { | ||
| 64 | case ImageType::e1D: | ||
| 43 | return VK_IMAGE_TYPE_1D; | 65 | return VK_IMAGE_TYPE_1D; |
| 44 | case SurfaceTarget::Texture2D: | 66 | case ImageType::e2D: |
| 45 | case SurfaceTarget::Texture2DArray: | 67 | case ImageType::Linear: |
| 46 | case SurfaceTarget::TextureCubemap: | ||
| 47 | case SurfaceTarget::TextureCubeArray: | ||
| 48 | return VK_IMAGE_TYPE_2D; | 68 | return VK_IMAGE_TYPE_2D; |
| 49 | case SurfaceTarget::Texture3D: | 69 | case ImageType::e3D: |
| 50 | return VK_IMAGE_TYPE_3D; | 70 | return VK_IMAGE_TYPE_3D; |
| 51 | case SurfaceTarget::TextureBuffer: | 71 | case ImageType::Buffer: |
| 52 | UNREACHABLE(); | 72 | break; |
| 53 | return {}; | ||
| 54 | } | 73 | } |
| 55 | UNREACHABLE_MSG("Unknown texture target={}", target); | 74 | UNREACHABLE_MSG("Invalid image type={}", type); |
| 56 | return {}; | 75 | return {}; |
| 57 | } | 76 | } |
| 58 | 77 | ||
| 59 | VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { | 78 | [[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { |
| 60 | if (pixel_format < PixelFormat::MaxColorFormat) { | 79 | switch (num_samples) { |
| 61 | return VK_IMAGE_ASPECT_COLOR_BIT; | 80 | case 1: |
| 62 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { | 81 | return VK_SAMPLE_COUNT_1_BIT; |
| 63 | return VK_IMAGE_ASPECT_DEPTH_BIT; | 82 | case 2: |
| 64 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { | 83 | return VK_SAMPLE_COUNT_2_BIT; |
| 65 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | 84 | case 4: |
| 66 | } else { | 85 | return VK_SAMPLE_COUNT_4_BIT; |
| 67 | UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); | 86 | case 8: |
| 68 | return VK_IMAGE_ASPECT_COLOR_BIT; | 87 | return VK_SAMPLE_COUNT_8_BIT; |
| 88 | case 16: | ||
| 89 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 90 | default: | ||
| 91 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 92 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 69 | } | 93 | } |
| 70 | } | 94 | } |
| 71 | 95 | ||
| 72 | VkImageViewType GetImageViewType(SurfaceTarget target) { | 96 | [[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { |
| 73 | switch (target) { | 97 | const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); |
| 74 | case SurfaceTarget::Texture1D: | 98 | VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; |
| 75 | return VK_IMAGE_VIEW_TYPE_1D; | 99 | if (info.type == ImageType::e2D && info.resources.layers >= 6 && |
| 76 | case SurfaceTarget::Texture2D: | 100 | info.size.width == info.size.height) { |
| 77 | return VK_IMAGE_VIEW_TYPE_2D; | 101 | flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; |
| 78 | case SurfaceTarget::Texture3D: | ||
| 79 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 80 | case SurfaceTarget::Texture1DArray: | ||
| 81 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 82 | case SurfaceTarget::Texture2DArray: | ||
| 83 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 84 | case SurfaceTarget::TextureCubemap: | ||
| 85 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 86 | case SurfaceTarget::TextureCubeArray: | ||
| 87 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 88 | case SurfaceTarget::TextureBuffer: | ||
| 89 | break; | ||
| 90 | } | 102 | } |
| 91 | UNREACHABLE(); | 103 | if (info.type == ImageType::e3D) { |
| 92 | return {}; | 104 | flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; |
| 93 | } | 105 | } |
| 94 | 106 | VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | | |
| 95 | vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, | 107 | VK_IMAGE_USAGE_SAMPLED_BIT; |
| 96 | std::size_t host_memory_size) { | 108 | if (format_info.attachable) { |
| 97 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache | 109 | switch (VideoCore::Surface::GetFormatType(info.format)) { |
| 98 | return device.GetLogical().CreateBuffer({ | 110 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| 99 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 111 | usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; |
| 112 | break; | ||
| 113 | case VideoCore::Surface::SurfaceType::Depth: | ||
| 114 | case VideoCore::Surface::SurfaceType::DepthStencil: | ||
| 115 | usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; | ||
| 116 | break; | ||
| 117 | default: | ||
| 118 | UNREACHABLE_MSG("Invalid surface type"); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | if (format_info.storage) { | ||
| 122 | usage |= VK_IMAGE_USAGE_STORAGE_BIT; | ||
| 123 | } | ||
| 124 | const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); | ||
| 125 | return VkImageCreateInfo{ | ||
| 126 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | ||
| 100 | .pNext = nullptr, | 127 | .pNext = nullptr, |
| 101 | .flags = 0, | 128 | .flags = flags, |
| 102 | .size = static_cast<VkDeviceSize>(host_memory_size), | 129 | .imageType = ConvertImageType(info.type), |
| 103 | .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | 130 | .format = format_info.format, |
| 104 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | 131 | .extent = |
| 105 | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 132 | { |
| 133 | .width = info.size.width >> samples_x, | ||
| 134 | .height = info.size.height >> samples_y, | ||
| 135 | .depth = info.size.depth, | ||
| 136 | }, | ||
| 137 | .mipLevels = static_cast<u32>(info.resources.levels), | ||
| 138 | .arrayLayers = static_cast<u32>(info.resources.layers), | ||
| 139 | .samples = ConvertSampleCount(info.num_samples), | ||
| 140 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 141 | .usage = usage, | ||
| 106 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 142 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 107 | .queueFamilyIndexCount = 0, | 143 | .queueFamilyIndexCount = 0, |
| 108 | .pQueueFamilyIndices = nullptr, | 144 | .pQueueFamilyIndices = nullptr, |
| 109 | }); | 145 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 110 | } | ||
| 111 | |||
| 112 | VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, | ||
| 113 | const SurfaceParams& params, VkBuffer buffer, | ||
| 114 | std::size_t host_memory_size) { | ||
| 115 | ASSERT(params.IsBuffer()); | ||
| 116 | |||
| 117 | return { | ||
| 118 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 119 | .pNext = nullptr, | ||
| 120 | .flags = 0, | ||
| 121 | .buffer = buffer, | ||
| 122 | .format = | ||
| 123 | MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, | ||
| 124 | .offset = 0, | ||
| 125 | .range = static_cast<VkDeviceSize>(host_memory_size), | ||
| 126 | }; | 146 | }; |
| 127 | } | 147 | } |
| 128 | 148 | ||
| 129 | VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { | 149 | [[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { |
| 130 | ASSERT(!params.IsBuffer()); | 150 | if (info.type == ImageType::Buffer) { |
| 131 | 151 | return vk::Image{}; | |
| 132 | const auto [format, attachable, storage] = | 152 | } |
| 133 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); | 153 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); |
| 154 | } | ||
| 134 | 155 | ||
| 135 | VkImageCreateInfo ci{ | 156 | [[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { |
| 136 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 157 | if (info.type != ImageType::Buffer) { |
| 158 | return vk::Buffer{}; | ||
| 159 | } | ||
| 160 | const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); | ||
| 161 | return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 162 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 137 | .pNext = nullptr, | 163 | .pNext = nullptr, |
| 138 | .flags = 0, | 164 | .flags = 0, |
| 139 | .imageType = SurfaceTargetToImage(params.target), | 165 | .size = info.size.width * bytes_per_block, |
| 140 | .format = format, | 166 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 141 | .extent = {}, | 167 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | |
| 142 | .mipLevels = params.num_levels, | 168 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, |
| 143 | .arrayLayers = static_cast<u32>(params.GetNumLayers()), | ||
| 144 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 145 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 146 | .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | | ||
| 147 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, | ||
| 148 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 169 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 149 | .queueFamilyIndexCount = 0, | 170 | .queueFamilyIndexCount = 0, |
| 150 | .pQueueFamilyIndices = nullptr, | 171 | .pQueueFamilyIndices = nullptr, |
| 151 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 172 | }); |
| 152 | }; | ||
| 153 | if (attachable) { | ||
| 154 | ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | ||
| 155 | : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | ||
| 156 | } | ||
| 157 | if (storage) { | ||
| 158 | ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; | ||
| 159 | } | ||
| 160 | |||
| 161 | switch (params.target) { | ||
| 162 | case SurfaceTarget::TextureCubemap: | ||
| 163 | case SurfaceTarget::TextureCubeArray: | ||
| 164 | ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; | ||
| 165 | [[fallthrough]]; | ||
| 166 | case SurfaceTarget::Texture1D: | ||
| 167 | case SurfaceTarget::Texture1DArray: | ||
| 168 | case SurfaceTarget::Texture2D: | ||
| 169 | case SurfaceTarget::Texture2DArray: | ||
| 170 | ci.extent = {params.width, params.height, 1}; | ||
| 171 | break; | ||
| 172 | case SurfaceTarget::Texture3D: | ||
| 173 | ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; | ||
| 174 | ci.extent = {params.width, params.height, params.depth}; | ||
| 175 | break; | ||
| 176 | case SurfaceTarget::TextureBuffer: | ||
| 177 | UNREACHABLE(); | ||
| 178 | } | ||
| 179 | |||
| 180 | return ci; | ||
| 181 | } | 173 | } |
| 182 | 174 | ||
| 183 | u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | 175 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { |
| 184 | SwizzleSource w_source) { | 176 | switch (VideoCore::Surface::GetFormatType(format)) { |
| 185 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | 177 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| 186 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | 178 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 179 | case VideoCore::Surface::SurfaceType::Depth: | ||
| 180 | return VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 181 | case VideoCore::Surface::SurfaceType::DepthStencil: | ||
| 182 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 183 | default: | ||
| 184 | UNREACHABLE_MSG("Invalid surface type"); | ||
| 185 | return VkImageAspectFlags{}; | ||
| 186 | } | ||
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | } // Anonymous namespace | 189 | [[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { |
| 190 | 190 | if (info.IsRenderTarget()) { | |
| 191 | CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, | 191 | return ImageAspectMask(info.format); |
| 192 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, | ||
| 193 | GPUVAddr gpu_addr_, const SurfaceParams& params_) | ||
| 194 | : SurfaceBase<View>{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, | ||
| 195 | memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { | ||
| 196 | if (params.IsBuffer()) { | ||
| 197 | buffer = CreateBuffer(device, params, host_memory_size); | ||
| 198 | commit = memory_manager.Commit(buffer, false); | ||
| 199 | |||
| 200 | const auto buffer_view_ci = | ||
| 201 | GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); | ||
| 202 | format = buffer_view_ci.format; | ||
| 203 | |||
| 204 | buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); | ||
| 205 | } else { | ||
| 206 | const auto image_ci = GenerateImageCreateInfo(device, params); | ||
| 207 | format = image_ci.format; | ||
| 208 | |||
| 209 | image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); | ||
| 210 | commit = memory_manager.Commit(image->GetHandle(), false); | ||
| 211 | } | 192 | } |
| 212 | 193 | const bool is_first = info.Swizzle()[0] == SwizzleSource::R; | |
| 213 | // TODO(Rodrigo): Move this to a virtual function. | 194 | switch (info.format) { |
| 214 | u32 num_layers = 1; | 195 | case PixelFormat::D24_UNORM_S8_UINT: |
| 215 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { | 196 | case PixelFormat::D32_FLOAT_S8_UINT: |
| 216 | num_layers = params.depth; | 197 | return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; |
| 198 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 199 | return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 200 | case PixelFormat::D16_UNORM: | ||
| 201 | case PixelFormat::D32_FLOAT: | ||
| 202 | return VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 203 | default: | ||
| 204 | return VK_IMAGE_ASPECT_COLOR_BIT; | ||
| 217 | } | 205 | } |
| 218 | main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); | ||
| 219 | } | 206 | } |
| 220 | 207 | ||
| 221 | CachedSurface::~CachedSurface() = default; | 208 | [[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, |
| 222 | 209 | const ImageView* image_view) { | |
| 223 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | 210 | const auto pixel_format = image_view->format; |
| 224 | // To upload data we have to be outside of a renderpass | 211 | return VkAttachmentDescription{ |
| 225 | scheduler.RequestOutsideRenderPassOperationContext(); | 212 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, |
| 213 | .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, | ||
| 214 | .samples = image_view->Samples(), | ||
| 215 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 216 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 217 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 218 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 219 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 220 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 221 | }; | ||
| 222 | } | ||
| 226 | 223 | ||
| 227 | if (params.IsBuffer()) { | 224 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { |
| 228 | UploadBuffer(staging_buffer); | 225 | switch (swizzle) { |
| 229 | } else { | 226 | case SwizzleSource::Zero: |
| 230 | UploadImage(staging_buffer); | 227 | return VK_COMPONENT_SWIZZLE_ZERO; |
| 228 | case SwizzleSource::R: | ||
| 229 | return VK_COMPONENT_SWIZZLE_R; | ||
| 230 | case SwizzleSource::G: | ||
| 231 | return VK_COMPONENT_SWIZZLE_G; | ||
| 232 | case SwizzleSource::B: | ||
| 233 | return VK_COMPONENT_SWIZZLE_B; | ||
| 234 | case SwizzleSource::A: | ||
| 235 | return VK_COMPONENT_SWIZZLE_A; | ||
| 236 | case SwizzleSource::OneFloat: | ||
| 237 | case SwizzleSource::OneInt: | ||
| 238 | return VK_COMPONENT_SWIZZLE_ONE; | ||
| 231 | } | 239 | } |
| 240 | UNREACHABLE_MSG("Invalid swizzle={}", swizzle); | ||
| 241 | return VK_COMPONENT_SWIZZLE_ZERO; | ||
| 232 | } | 242 | } |
| 233 | 243 | ||
| 234 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | 244 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { |
| 235 | UNIMPLEMENTED_IF(params.IsBuffer()); | 245 | switch (type) { |
| 236 | 246 | case VideoCommon::ImageViewType::e1D: | |
| 237 | if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { | 247 | return VK_IMAGE_VIEW_TYPE_1D; |
| 238 | LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); | 248 | case VideoCommon::ImageViewType::e2D: |
| 249 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 250 | case VideoCommon::ImageViewType::Cube: | ||
| 251 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 252 | case VideoCommon::ImageViewType::e3D: | ||
| 253 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 254 | case VideoCommon::ImageViewType::e1DArray: | ||
| 255 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 256 | case VideoCommon::ImageViewType::e2DArray: | ||
| 257 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 258 | case VideoCommon::ImageViewType::CubeArray: | ||
| 259 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 260 | case VideoCommon::ImageViewType::Rect: | ||
| 261 | LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); | ||
| 262 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 263 | case VideoCommon::ImageViewType::Buffer: | ||
| 264 | UNREACHABLE_MSG("Texture buffers can't be image views"); | ||
| 265 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 239 | } | 266 | } |
| 267 | UNREACHABLE_MSG("Invalid image view type={}", type); | ||
| 268 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 269 | } | ||
| 240 | 270 | ||
| 241 | // We can't copy images to buffers inside a renderpass | 271 | [[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( |
| 242 | scheduler.RequestOutsideRenderPassOperationContext(); | 272 | VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { |
| 273 | return VkImageSubresourceLayers{ | ||
| 274 | .aspectMask = aspect_mask, | ||
| 275 | .mipLevel = static_cast<u32>(subresource.base_level), | ||
| 276 | .baseArrayLayer = static_cast<u32>(subresource.base_layer), | ||
| 277 | .layerCount = static_cast<u32>(subresource.num_layers), | ||
| 278 | }; | ||
| 279 | } | ||
| 243 | 280 | ||
| 244 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, | 281 | [[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { |
| 245 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); | 282 | return VkOffset3D{ |
| 283 | .x = offset3d.x, | ||
| 284 | .y = offset3d.y, | ||
| 285 | .z = offset3d.z, | ||
| 286 | }; | ||
| 287 | } | ||
| 246 | 288 | ||
| 247 | const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 289 | [[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { |
| 248 | // TODO(Rodrigo): Do this in a single copy | 290 | return VkExtent3D{ |
| 249 | for (u32 level = 0; level < params.num_levels; ++level) { | 291 | .width = static_cast<u32>(extent3d.width), |
| 250 | scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, | 292 | .height = static_cast<u32>(extent3d.height), |
| 251 | copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { | 293 | .depth = static_cast<u32>(extent3d.depth), |
| 252 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); | 294 | }; |
| 253 | }); | 295 | } |
| 254 | } | ||
| 255 | scheduler.Finish(); | ||
| 256 | 296 | ||
| 257 | // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. | 297 | [[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, |
| 258 | std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), | 298 | VkImageAspectFlags aspect_mask) noexcept { |
| 259 | host_memory_size); | 299 | return VkImageCopy{ |
| 300 | .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), | ||
| 301 | .srcOffset = MakeOffset3D(copy.src_offset), | ||
| 302 | .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), | ||
| 303 | .dstOffset = MakeOffset3D(copy.dst_offset), | ||
| 304 | .extent = MakeExtent3D(copy.extent), | ||
| 305 | }; | ||
| 260 | } | 306 | } |
| 261 | 307 | ||
| 262 | void CachedSurface::DecorateSurfaceName() { | 308 | [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( |
| 263 | // TODO(Rodrigo): Add name decorations | 309 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 310 | std::vector<VkBufferCopy> result(copies.size()); | ||
| 311 | std::ranges::transform( | ||
| 312 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { | ||
| 313 | return VkBufferCopy{ | ||
| 314 | .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset), | ||
| 315 | .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset), | ||
| 316 | .size = static_cast<VkDeviceSize>(copy.size), | ||
| 317 | }; | ||
| 318 | }); | ||
| 319 | return result; | ||
| 264 | } | 320 | } |
| 265 | 321 | ||
| 266 | View CachedSurface::CreateView(const ViewParams& view_params) { | 322 | [[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( |
| 267 | // TODO(Rodrigo): Add name decorations | 323 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { |
| 268 | return views[view_params] = std::make_shared<CachedSurfaceView>(device, *this, view_params); | 324 | struct Maker { |
| 325 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { | ||
| 326 | return VkBufferImageCopy{ | ||
| 327 | .bufferOffset = copy.buffer_offset + buffer_offset, | ||
| 328 | .bufferRowLength = copy.buffer_row_length, | ||
| 329 | .bufferImageHeight = copy.buffer_image_height, | ||
| 330 | .imageSubresource = | ||
| 331 | { | ||
| 332 | .aspectMask = aspect_mask, | ||
| 333 | .mipLevel = static_cast<u32>(copy.image_subresource.base_level), | ||
| 334 | .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer), | ||
| 335 | .layerCount = static_cast<u32>(copy.image_subresource.num_layers), | ||
| 336 | }, | ||
| 337 | .imageOffset = | ||
| 338 | { | ||
| 339 | .x = copy.image_offset.x, | ||
| 340 | .y = copy.image_offset.y, | ||
| 341 | .z = copy.image_offset.z, | ||
| 342 | }, | ||
| 343 | .imageExtent = | ||
| 344 | { | ||
| 345 | .width = copy.image_extent.width, | ||
| 346 | .height = copy.image_extent.height, | ||
| 347 | .depth = copy.image_extent.depth, | ||
| 348 | }, | ||
| 349 | }; | ||
| 350 | } | ||
| 351 | size_t buffer_offset; | ||
| 352 | VkImageAspectFlags aspect_mask; | ||
| 353 | }; | ||
| 354 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 355 | std::vector<VkBufferImageCopy> result(copies.size() * 2); | ||
| 356 | std::ranges::transform(copies, result.begin(), | ||
| 357 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); | ||
| 358 | std::ranges::transform(copies, result.begin() + copies.size(), | ||
| 359 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); | ||
| 360 | return result; | ||
| 361 | } else { | ||
| 362 | std::vector<VkBufferImageCopy> result(copies.size()); | ||
| 363 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); | ||
| 364 | return result; | ||
| 365 | } | ||
| 269 | } | 366 | } |
| 270 | 367 | ||
| 271 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | 368 | [[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, |
| 272 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 369 | const SubresourceRange& range) { |
| 273 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 370 | return VkImageSubresourceRange{ |
| 371 | .aspectMask = aspect_mask, | ||
| 372 | .baseMipLevel = static_cast<u32>(range.base.level), | ||
| 373 | .levelCount = static_cast<u32>(range.extent.levels), | ||
| 374 | .baseArrayLayer = static_cast<u32>(range.base.layer), | ||
| 375 | .layerCount = static_cast<u32>(range.extent.layers), | ||
| 376 | }; | ||
| 377 | } | ||
| 274 | 378 | ||
| 275 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, | 379 | [[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { |
| 276 | size = host_memory_size](vk::CommandBuffer cmdbuf) { | 380 | SubresourceRange range = image_view->range; |
| 277 | VkBufferCopy copy; | 381 | if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 278 | copy.srcOffset = 0; | 382 | // Slice image views always affect a single layer, but their subresource range corresponds |
| 279 | copy.dstOffset = 0; | 383 | // to the slice. Override the value to affect a single layer. |
| 280 | copy.size = size; | 384 | range.base.layer = 0; |
| 281 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | 385 | range.extent.layers = 1; |
| 386 | } | ||
| 387 | return MakeSubresourceRange(ImageAspectMask(image_view->format), range); | ||
| 388 | } | ||
| 282 | 389 | ||
| 283 | VkBufferMemoryBarrier barrier; | 390 | [[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { |
| 284 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 391 | return VkImageSubresourceLayers{ |
| 285 | barrier.pNext = nullptr; | 392 | .aspectMask = ImageAspectMask(image_view->format), |
| 286 | barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 393 | .mipLevel = static_cast<u32>(image_view->range.base.level), |
| 287 | barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | 394 | .baseArrayLayer = static_cast<u32>(image_view->range.base.layer), |
| 288 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway | 395 | .layerCount = static_cast<u32>(image_view->range.extent.layers), |
| 289 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 396 | }; |
| 290 | barrier.buffer = dst_buffer; | ||
| 291 | barrier.offset = 0; | ||
| 292 | barrier.size = size; | ||
| 293 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, | ||
| 294 | 0, {}, barrier, {}); | ||
| 295 | }); | ||
| 296 | } | 397 | } |
| 297 | 398 | ||
| 298 | void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | 399 | [[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { |
| 299 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 400 | switch (value) { |
| 300 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 401 | case SwizzleSource::G: |
| 301 | 402 | return SwizzleSource::R; | |
| 302 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, | 403 | default: |
| 303 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | 404 | return value; |
| 304 | |||
| 305 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 306 | const VkBufferImageCopy copy = GetBufferImageCopy(level); | ||
| 307 | if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 308 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), | ||
| 309 | copy](vk::CommandBuffer cmdbuf) { | ||
| 310 | std::array<VkBufferImageCopy, 2> copies = {copy, copy}; | ||
| 311 | copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 312 | copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 313 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 314 | copies); | ||
| 315 | }); | ||
| 316 | } else { | ||
| 317 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), | ||
| 318 | copy](vk::CommandBuffer cmdbuf) { | ||
| 319 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | ||
| 320 | }); | ||
| 321 | } | ||
| 322 | } | 405 | } |
| 323 | } | 406 | } |
| 324 | 407 | ||
| 325 | VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { | 408 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, |
| 326 | return { | 409 | VkImageAspectFlags aspect_mask, bool is_initialized, |
| 327 | .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), | 410 | std::span<const VkBufferImageCopy> copies) { |
| 328 | .bufferRowLength = 0, | 411 | static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | |
| 329 | .bufferImageHeight = 0, | 412 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 330 | .imageSubresource = | 413 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 414 | const VkImageMemoryBarrier read_barrier{ | ||
| 415 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 416 | .pNext = nullptr, | ||
| 417 | .srcAccessMask = ACCESS_FLAGS, | ||
| 418 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 419 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 420 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 421 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 422 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 423 | .image = image, | ||
| 424 | .subresourceRange = | ||
| 331 | { | 425 | { |
| 332 | .aspectMask = image->GetAspectMask(), | 426 | .aspectMask = aspect_mask, |
| 333 | .mipLevel = level, | 427 | .baseMipLevel = 0, |
| 428 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 334 | .baseArrayLayer = 0, | 429 | .baseArrayLayer = 0, |
| 335 | .layerCount = static_cast<u32>(params.GetNumLayers()), | 430 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 336 | }, | 431 | }, |
| 337 | .imageOffset = {.x = 0, .y = 0, .z = 0}, | 432 | }; |
| 338 | .imageExtent = | 433 | const VkImageMemoryBarrier write_barrier{ |
| 434 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 435 | .pNext = nullptr, | ||
| 436 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 437 | .dstAccessMask = ACCESS_FLAGS, | ||
| 438 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 439 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 440 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 441 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 442 | .image = image, | ||
| 443 | .subresourceRange = | ||
| 339 | { | 444 | { |
| 340 | .width = params.GetMipWidth(level), | 445 | .aspectMask = aspect_mask, |
| 341 | .height = params.GetMipHeight(level), | 446 | .baseMipLevel = 0, |
| 342 | .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, | 447 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 448 | .baseArrayLayer = 0, | ||
| 449 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 343 | }, | 450 | }, |
| 344 | }; | 451 | }; |
| 452 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | ||
| 453 | read_barrier); | ||
| 454 | cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); | ||
| 455 | // TODO: Move this to another API | ||
| 456 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, | ||
| 457 | write_barrier); | ||
| 345 | } | 458 | } |
| 346 | 459 | ||
| 347 | VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | 460 | [[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, |
| 348 | return {image->GetAspectMask(), 0, params.num_levels, 0, | 461 | const std::array<Offset2D, 2>& src_region, |
| 349 | static_cast<u32>(params.GetNumLayers())}; | 462 | const VkImageSubresourceLayers& dst_layers, |
| 463 | const VkImageSubresourceLayers& src_layers) { | ||
| 464 | return VkImageBlit{ | ||
| 465 | .srcSubresource = src_layers, | ||
| 466 | .srcOffsets = | ||
| 467 | { | ||
| 468 | { | ||
| 469 | .x = src_region[0].x, | ||
| 470 | .y = src_region[0].y, | ||
| 471 | .z = 0, | ||
| 472 | }, | ||
| 473 | { | ||
| 474 | .x = src_region[1].x, | ||
| 475 | .y = src_region[1].y, | ||
| 476 | .z = 1, | ||
| 477 | }, | ||
| 478 | }, | ||
| 479 | .dstSubresource = dst_layers, | ||
| 480 | .dstOffsets = | ||
| 481 | { | ||
| 482 | { | ||
| 483 | .x = dst_region[0].x, | ||
| 484 | .y = dst_region[0].y, | ||
| 485 | .z = 0, | ||
| 486 | }, | ||
| 487 | { | ||
| 488 | .x = dst_region[1].x, | ||
| 489 | .y = dst_region[1].y, | ||
| 490 | .z = 1, | ||
| 491 | }, | ||
| 492 | }, | ||
| 493 | }; | ||
| 350 | } | 494 | } |
| 351 | 495 | ||
| 352 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, | 496 | [[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, |
| 353 | const ViewParams& view_params_) | 497 | const std::array<Offset2D, 2>& src_region, |
| 354 | : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, | 498 | const VkImageSubresourceLayers& dst_layers, |
| 355 | image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, | 499 | const VkImageSubresourceLayers& src_layers) { |
| 356 | aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, | 500 | return VkImageResolve{ |
| 357 | base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, | 501 | .srcSubresource = src_layers, |
| 358 | image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { | 502 | .srcOffset = |
| 359 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 503 | { |
| 360 | base_layer = 0; | 504 | .x = src_region[0].x, |
| 361 | num_layers = 1; | 505 | .y = src_region[0].y, |
| 362 | base_slice = view_params_.base_layer; | 506 | .z = 0, |
| 363 | num_slices = view_params_.num_layers; | 507 | }, |
| 364 | } else { | 508 | .dstSubresource = dst_layers, |
| 365 | base_layer = view_params_.base_layer; | 509 | .dstOffset = |
| 366 | num_layers = view_params_.num_layers; | 510 | { |
| 367 | } | 511 | .x = dst_region[0].x, |
| 512 | .y = dst_region[0].y, | ||
| 513 | .z = 0, | ||
| 514 | }, | ||
| 515 | .extent = | ||
| 516 | { | ||
| 517 | .width = static_cast<u32>(dst_region[1].x - dst_region[0].x), | ||
| 518 | .height = static_cast<u32>(dst_region[1].y - dst_region[0].y), | ||
| 519 | .depth = 1, | ||
| 520 | }, | ||
| 521 | }; | ||
| 368 | } | 522 | } |
| 369 | 523 | ||
| 370 | CachedSurfaceView::~CachedSurfaceView() = default; | 524 | struct RangedBarrierRange { |
| 371 | 525 | u32 min_mip = std::numeric_limits<u32>::max(); | |
| 372 | VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, | 526 | u32 max_mip = std::numeric_limits<u32>::min(); |
| 373 | SwizzleSource z_source, SwizzleSource w_source) { | 527 | u32 min_layer = std::numeric_limits<u32>::max(); |
| 374 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 528 | u32 max_layer = std::numeric_limits<u32>::min(); |
| 375 | if (last_image_view && last_swizzle == new_swizzle) { | 529 | |
| 376 | return last_image_view; | 530 | void AddLayers(const VkImageSubresourceLayers& layers) { |
| 531 | min_mip = std::min(min_mip, layers.mipLevel); | ||
| 532 | max_mip = std::max(max_mip, layers.mipLevel + 1); | ||
| 533 | min_layer = std::min(min_layer, layers.baseArrayLayer); | ||
| 534 | max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); | ||
| 377 | } | 535 | } |
| 378 | last_swizzle = new_swizzle; | ||
| 379 | 536 | ||
| 380 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | 537 | VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { |
| 381 | auto& image_view = entry->second; | 538 | return VkImageSubresourceRange{ |
| 382 | if (!is_cache_miss) { | 539 | .aspectMask = aspect_mask, |
| 383 | return last_image_view = *image_view; | 540 | .baseMipLevel = min_mip, |
| 541 | .levelCount = max_mip - min_mip, | ||
| 542 | .baseArrayLayer = min_layer, | ||
| 543 | .layerCount = max_layer - min_layer, | ||
| 544 | }; | ||
| 384 | } | 545 | } |
| 546 | }; | ||
| 385 | 547 | ||
| 386 | std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), | 548 | } // Anonymous namespace |
| 387 | MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; | ||
| 388 | if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { | ||
| 389 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | ||
| 390 | std::swap(swizzle[0], swizzle[2]); | ||
| 391 | } | ||
| 392 | 549 | ||
| 393 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | 550 | void TextureCacheRuntime::Finish() { |
| 394 | // hardware. To emulate this on Vulkan we specify it in the aspect. | 551 | scheduler.Finish(); |
| 395 | VkImageAspectFlags aspect = aspect_mask; | 552 | } |
| 396 | if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 397 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | ||
| 398 | const bool is_first = x_source == SwizzleSource::R; | ||
| 399 | switch (surface_params.pixel_format) { | ||
| 400 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 401 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 402 | aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 403 | break; | ||
| 404 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 405 | aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 406 | break; | ||
| 407 | default: | ||
| 408 | aspect = VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 409 | UNIMPLEMENTED(); | ||
| 410 | } | ||
| 411 | 553 | ||
| 412 | // Make sure we sample the first component | 554 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { |
| 413 | std::transform( | 555 | const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); |
| 414 | swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { | 556 | return ImageBufferMap{ |
| 415 | return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; | 557 | .handle = *buffer.handle, |
| 416 | }); | 558 | .map = buffer.commit->Map(size), |
| 417 | } | 559 | }; |
| 560 | } | ||
| 418 | 561 | ||
| 419 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 562 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 420 | ASSERT(base_slice == 0); | 563 | const std::array<Offset2D, 2>& dst_region, |
| 421 | ASSERT(num_slices == surface_params.depth); | 564 | const std::array<Offset2D, 2>& src_region, |
| 565 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 566 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 567 | const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); | ||
| 568 | const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; | ||
| 569 | const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; | ||
| 570 | ASSERT(aspect_mask == ImageAspectMask(dst.format)); | ||
| 571 | if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { | ||
| 572 | blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, | ||
| 573 | operation); | ||
| 574 | return; | ||
| 422 | } | 575 | } |
| 423 | 576 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | |
| 424 | image_view = device.GetLogical().CreateImageView({ | 577 | if (!device.IsBlitDepthStencilSupported()) { |
| 425 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 578 | UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); |
| 426 | .pNext = nullptr, | 579 | blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), |
| 427 | .flags = 0, | 580 | dst_region, src_region, filter, operation); |
| 428 | .image = surface.GetImageHandle(), | 581 | return; |
| 429 | .viewType = image_view_type, | 582 | } |
| 430 | .format = surface.GetImage().GetFormat(), | 583 | } |
| 431 | .components = | 584 | ASSERT(src.ImageFormat() == dst.ImageFormat()); |
| 432 | { | 585 | ASSERT(!(is_dst_msaa && !is_src_msaa)); |
| 433 | .r = swizzle[0], | 586 | ASSERT(operation == Fermi2D::Operation::SrcCopy); |
| 434 | .g = swizzle[1], | 587 | |
| 435 | .b = swizzle[2], | 588 | const VkImage dst_image = dst.ImageHandle(); |
| 436 | .a = swizzle[3], | 589 | const VkImage src_image = src.ImageHandle(); |
| 590 | const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); | ||
| 591 | const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); | ||
| 592 | const bool is_resolve = is_src_msaa && !is_dst_msaa; | ||
| 593 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 594 | scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, | ||
| 595 | aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { | ||
| 596 | const std::array read_barriers{ | ||
| 597 | VkImageMemoryBarrier{ | ||
| 598 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 599 | .pNext = nullptr, | ||
| 600 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | | ||
| 601 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 602 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 603 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 604 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 605 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 606 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 607 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 608 | .image = src_image, | ||
| 609 | .subresourceRange{ | ||
| 610 | .aspectMask = aspect_mask, | ||
| 611 | .baseMipLevel = 0, | ||
| 612 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 613 | .baseArrayLayer = 0, | ||
| 614 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 615 | }, | ||
| 437 | }, | 616 | }, |
| 438 | .subresourceRange = | 617 | VkImageMemoryBarrier{ |
| 439 | { | 618 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 440 | .aspectMask = aspect, | 619 | .pNext = nullptr, |
| 441 | .baseMipLevel = base_level, | 620 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | |
| 442 | .levelCount = num_levels, | 621 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 443 | .baseArrayLayer = base_layer, | 622 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 444 | .layerCount = num_layers, | 623 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 624 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 625 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 626 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 627 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 628 | .image = dst_image, | ||
| 629 | .subresourceRange{ | ||
| 630 | .aspectMask = aspect_mask, | ||
| 631 | .baseMipLevel = 0, | ||
| 632 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 633 | .baseArrayLayer = 0, | ||
| 634 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 635 | }, | ||
| 636 | }, | ||
| 637 | }; | ||
| 638 | VkImageMemoryBarrier write_barrier{ | ||
| 639 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 640 | .pNext = nullptr, | ||
| 641 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 642 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 643 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 644 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 645 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 646 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 647 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 648 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 649 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 650 | .image = dst_image, | ||
| 651 | .subresourceRange{ | ||
| 652 | .aspectMask = aspect_mask, | ||
| 653 | .baseMipLevel = 0, | ||
| 654 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 655 | .baseArrayLayer = 0, | ||
| 656 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 445 | }, | 657 | }, |
| 658 | }; | ||
| 659 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 660 | 0, nullptr, nullptr, read_barriers); | ||
| 661 | if (is_resolve) { | ||
| 662 | cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, | ||
| 663 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 664 | MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); | ||
| 665 | } else { | ||
| 666 | const bool is_linear = filter == Fermi2D::Filter::Bilinear; | ||
| 667 | const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; | ||
| 668 | cmdbuf.BlitImage( | ||
| 669 | src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 670 | MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); | ||
| 671 | } | ||
| 672 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 673 | 0, write_barrier); | ||
| 446 | }); | 674 | }); |
| 447 | |||
| 448 | return last_image_view = *image_view; | ||
| 449 | } | 675 | } |
| 450 | 676 | ||
| 451 | VkImageView CachedSurfaceView::GetAttachment() { | 677 | void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { |
| 452 | if (render_target) { | 678 | switch (dst_view.format) { |
| 453 | return *render_target; | 679 | case PixelFormat::R16_UNORM: |
| 680 | if (src_view.format == PixelFormat::D16_UNORM) { | ||
| 681 | return blit_image_helper.ConvertD16ToR16(dst, src_view); | ||
| 682 | } | ||
| 683 | break; | ||
| 684 | case PixelFormat::R32_FLOAT: | ||
| 685 | if (src_view.format == PixelFormat::D32_FLOAT) { | ||
| 686 | return blit_image_helper.ConvertD32ToR32(dst, src_view); | ||
| 687 | } | ||
| 688 | break; | ||
| 689 | case PixelFormat::D16_UNORM: | ||
| 690 | if (src_view.format == PixelFormat::R16_UNORM) { | ||
| 691 | return blit_image_helper.ConvertR16ToD16(dst, src_view); | ||
| 692 | } | ||
| 693 | break; | ||
| 694 | case PixelFormat::D32_FLOAT: | ||
| 695 | if (src_view.format == PixelFormat::R32_FLOAT) { | ||
| 696 | return blit_image_helper.ConvertR32ToD32(dst, src_view); | ||
| 697 | } | ||
| 698 | break; | ||
| 699 | default: | ||
| 700 | break; | ||
| 454 | } | 701 | } |
| 702 | UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); | ||
| 703 | } | ||
| 455 | 704 | ||
| 456 | VkImageViewCreateInfo ci{ | 705 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, |
| 457 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 706 | std::span<const VideoCommon::ImageCopy> copies) { |
| 458 | .pNext = nullptr, | 707 | std::vector<VkImageCopy> vk_copies(copies.size()); |
| 459 | .flags = 0, | 708 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); |
| 460 | .image = surface.GetImageHandle(), | 709 | ASSERT(aspect_mask == src.AspectMask()); |
| 461 | .viewType = VK_IMAGE_VIEW_TYPE_1D, | 710 | |
| 462 | .format = surface.GetImage().GetFormat(), | 711 | std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { |
| 463 | .components = | 712 | return MakeImageCopy(copy, aspect_mask); |
| 464 | { | 713 | }); |
| 465 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 714 | const VkImage dst_image = dst.Handle(); |
| 466 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 715 | const VkImage src_image = src.Handle(); |
| 467 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | 716 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 468 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | 717 | scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { |
| 718 | RangedBarrierRange dst_range; | ||
| 719 | RangedBarrierRange src_range; | ||
| 720 | for (const VkImageCopy& copy : vk_copies) { | ||
| 721 | dst_range.AddLayers(copy.dstSubresource); | ||
| 722 | src_range.AddLayers(copy.srcSubresource); | ||
| 723 | } | ||
| 724 | const std::array read_barriers{ | ||
| 725 | VkImageMemoryBarrier{ | ||
| 726 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 727 | .pNext = nullptr, | ||
| 728 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 729 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 730 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 731 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 732 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 733 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 734 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 735 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 736 | .image = src_image, | ||
| 737 | .subresourceRange = src_range.SubresourceRange(aspect_mask), | ||
| 469 | }, | 738 | }, |
| 470 | .subresourceRange = | 739 | VkImageMemoryBarrier{ |
| 471 | { | 740 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 472 | .aspectMask = aspect_mask, | 741 | .pNext = nullptr, |
| 473 | .baseMipLevel = base_level, | 742 | .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | |
| 474 | .levelCount = num_levels, | 743 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | |
| 475 | .baseArrayLayer = 0, | 744 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 476 | .layerCount = 0, | 745 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | |
| 746 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 747 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 748 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 749 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 750 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 751 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 752 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 753 | .image = dst_image, | ||
| 754 | .subresourceRange = dst_range.SubresourceRange(aspect_mask), | ||
| 477 | }, | 755 | }, |
| 478 | }; | 756 | }; |
| 479 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 757 | const VkImageMemoryBarrier write_barrier{ |
| 480 | ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; | 758 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 481 | ci.subresourceRange.baseArrayLayer = base_slice; | 759 | .pNext = nullptr, |
| 482 | ci.subresourceRange.layerCount = num_slices; | 760 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 761 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 762 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 763 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 764 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 765 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 766 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 767 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 768 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 769 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 770 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 771 | .image = dst_image, | ||
| 772 | .subresourceRange = dst_range.SubresourceRange(aspect_mask), | ||
| 773 | }; | ||
| 774 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 775 | 0, {}, {}, read_barriers); | ||
| 776 | cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, | ||
| 777 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); | ||
| 778 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 779 | 0, write_barrier); | ||
| 780 | }); | ||
| 781 | } | ||
| 782 | |||
| 783 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | ||
| 784 | VAddr cpu_addr_) | ||
| 785 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | ||
| 786 | image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), | ||
| 787 | aspect_mask(ImageAspectMask(info.format)) { | ||
| 788 | if (image) { | ||
| 789 | commit = runtime.memory_manager.Commit(image, false); | ||
| 483 | } else { | 790 | } else { |
| 484 | ci.viewType = image_view_type; | 791 | commit = runtime.memory_manager.Commit(buffer, false); |
| 485 | ci.subresourceRange.baseArrayLayer = base_layer; | 792 | } |
| 486 | ci.subresourceRange.layerCount = num_layers; | 793 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 794 | flags |= VideoCommon::ImageFlagBits::Converted; | ||
| 795 | } | ||
| 796 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 797 | if (image) { | ||
| 798 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 799 | } else { | ||
| 800 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 801 | } | ||
| 487 | } | 802 | } |
| 488 | render_target = device.GetLogical().CreateImageView(ci); | ||
| 489 | return *render_target; | ||
| 490 | } | 803 | } |
| 491 | 804 | ||
| 492 | VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, | 805 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 493 | Tegra::Engines::Maxwell3D& maxwell3d_, | 806 | std::span<const BufferImageCopy> copies) { |
| 494 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 807 | // TODO: Move this to another API |
| 495 | VKMemoryManager& memory_manager_, VKScheduler& scheduler_, | 808 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 496 | VKStagingBufferPool& staging_pool_) | 809 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 497 | : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), | 810 | const VkBuffer src_buffer = map.handle; |
| 498 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ | 811 | const VkImage vk_image = *image; |
| 499 | staging_pool_} {} | 812 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 500 | 813 | const bool is_initialized = std::exchange(initialized, true); | |
| 501 | VKTextureCache::~VKTextureCache() = default; | 814 | scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, |
| 502 | 815 | vk_copies](vk::CommandBuffer cmdbuf) { | |
| 503 | Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | 816 | CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); |
| 504 | return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, | 817 | }); |
| 505 | gpu_addr, params); | ||
| 506 | } | 818 | } |
| 507 | 819 | ||
| 508 | void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | 820 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 509 | const VideoCommon::CopyParams& copy_params) { | 821 | std::span<const VideoCommon::BufferCopy> copies) { |
| 510 | const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | 822 | // TODO: Move this to another API |
| 511 | const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | 823 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 512 | UNIMPLEMENTED_IF(src_3d); | 824 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); |
| 825 | const VkBuffer src_buffer = map.handle; | ||
| 826 | const VkBuffer dst_buffer = *buffer; | ||
| 827 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 828 | // TODO: Barriers | ||
| 829 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 830 | }); | ||
| 831 | } | ||
| 513 | 832 | ||
| 514 | // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and | 833 | void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 515 | // dimension respectively. | 834 | std::span<const BufferImageCopy> copies) { |
| 516 | const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; | 835 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 517 | const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; | 836 | scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, |
| 837 | vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 838 | // TODO: Barriers | ||
| 839 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | ||
| 840 | }); | ||
| 841 | } | ||
| 518 | 842 | ||
| 519 | const u32 extent_z = dst_3d ? copy_params.depth : 1; | 843 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 520 | const u32 num_layers = dst_3d ? 1 : copy_params.depth; | 844 | ImageId image_id_, Image& image) |
| 845 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, | ||
| 846 | image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( | ||
| 847 | image.info.num_samples)} { | ||
| 848 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); | ||
| 849 | std::array<SwizzleSource, 4> swizzle{ | ||
| 850 | SwizzleSource::R, | ||
| 851 | SwizzleSource::G, | ||
| 852 | SwizzleSource::B, | ||
| 853 | SwizzleSource::A, | ||
| 854 | }; | ||
| 855 | if (!info.IsRenderTarget()) { | ||
| 856 | swizzle = info.Swizzle(); | ||
| 857 | if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { | ||
| 858 | std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); | ||
| 859 | } | ||
| 860 | } | ||
| 861 | const VkFormat vk_format = | ||
| 862 | MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; | ||
| 863 | const VkImageViewCreateInfo create_info{ | ||
| 864 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 865 | .pNext = nullptr, | ||
| 866 | .flags = 0, | ||
| 867 | .image = image.Handle(), | ||
| 868 | .viewType = VkImageViewType{}, | ||
| 869 | .format = vk_format, | ||
| 870 | .components{ | ||
| 871 | .r = ComponentSwizzle(swizzle[0]), | ||
| 872 | .g = ComponentSwizzle(swizzle[1]), | ||
| 873 | .b = ComponentSwizzle(swizzle[2]), | ||
| 874 | .a = ComponentSwizzle(swizzle[3]), | ||
| 875 | }, | ||
| 876 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), | ||
| 877 | }; | ||
| 878 | const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { | ||
| 879 | VkImageViewCreateInfo ci{create_info}; | ||
| 880 | ci.viewType = ImageViewType(view_type); | ||
| 881 | if (num_layers) { | ||
| 882 | ci.subresourceRange.layerCount = *num_layers; | ||
| 883 | } | ||
| 884 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); | ||
| 885 | if (device->HasDebuggingToolAttached()) { | ||
| 886 | handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); | ||
| 887 | } | ||
| 888 | image_views[static_cast<size_t>(view_type)] = std::move(handle); | ||
| 889 | }; | ||
| 890 | switch (info.type) { | ||
| 891 | case VideoCommon::ImageViewType::e1D: | ||
| 892 | case VideoCommon::ImageViewType::e1DArray: | ||
| 893 | create(VideoCommon::ImageViewType::e1D, 1); | ||
| 894 | create(VideoCommon::ImageViewType::e1DArray, std::nullopt); | ||
| 895 | render_target = Handle(VideoCommon::ImageViewType::e1DArray); | ||
| 896 | break; | ||
| 897 | case VideoCommon::ImageViewType::e2D: | ||
| 898 | case VideoCommon::ImageViewType::e2DArray: | ||
| 899 | create(VideoCommon::ImageViewType::e2D, 1); | ||
| 900 | create(VideoCommon::ImageViewType::e2DArray, std::nullopt); | ||
| 901 | render_target = Handle(VideoCommon::ImageViewType::e2DArray); | ||
| 902 | break; | ||
| 903 | case VideoCommon::ImageViewType::e3D: | ||
| 904 | create(VideoCommon::ImageViewType::e3D, std::nullopt); | ||
| 905 | render_target = Handle(VideoCommon::ImageViewType::e3D); | ||
| 906 | break; | ||
| 907 | case VideoCommon::ImageViewType::Cube: | ||
| 908 | case VideoCommon::ImageViewType::CubeArray: | ||
| 909 | create(VideoCommon::ImageViewType::Cube, 6); | ||
| 910 | create(VideoCommon::ImageViewType::CubeArray, std::nullopt); | ||
| 911 | break; | ||
| 912 | case VideoCommon::ImageViewType::Rect: | ||
| 913 | UNIMPLEMENTED(); | ||
| 914 | break; | ||
| 915 | case VideoCommon::ImageViewType::Buffer: | ||
| 916 | buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ | ||
| 917 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 918 | .pNext = nullptr, | ||
| 919 | .flags = 0, | ||
| 920 | .buffer = image.Buffer(), | ||
| 921 | .format = vk_format, | ||
| 922 | .offset = 0, // TODO: Redesign buffer cache to support this | ||
| 923 | .range = image.guest_size_bytes, | ||
| 924 | }); | ||
| 925 | break; | ||
| 926 | } | ||
| 927 | } | ||
| 521 | 928 | ||
| 522 | // We can't copy inside a renderpass | 929 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) |
| 523 | scheduler.RequestOutsideRenderPassOperationContext(); | 930 | : VideoCommon::ImageViewBase{params} {} |
| 524 | 931 | ||
| 525 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, | 932 | VkImageView ImageView::DepthView() { |
| 526 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, | 933 | if (depth_view) { |
| 527 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); | 934 | return *depth_view; |
| 528 | dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, | 935 | } |
| 529 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, | 936 | depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); |
| 530 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | 937 | return *depth_view; |
| 938 | } | ||
| 531 | 939 | ||
| 532 | const VkImageCopy copy{ | 940 | VkImageView ImageView::StencilView() { |
| 533 | .srcSubresource = | 941 | if (stencil_view) { |
| 534 | { | 942 | return *stencil_view; |
| 535 | .aspectMask = src_surface->GetAspectMask(), | 943 | } |
| 536 | .mipLevel = copy_params.source_level, | 944 | stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); |
| 537 | .baseArrayLayer = copy_params.source_z, | 945 | return *stencil_view; |
| 538 | .layerCount = num_layers, | 946 | } |
| 539 | }, | ||
| 540 | .srcOffset = | ||
| 541 | { | ||
| 542 | .x = static_cast<s32>(copy_params.source_x), | ||
| 543 | .y = static_cast<s32>(copy_params.source_y), | ||
| 544 | .z = 0, | ||
| 545 | }, | ||
| 546 | .dstSubresource = | ||
| 547 | { | ||
| 548 | .aspectMask = dst_surface->GetAspectMask(), | ||
| 549 | .mipLevel = copy_params.dest_level, | ||
| 550 | .baseArrayLayer = dst_base_layer, | ||
| 551 | .layerCount = num_layers, | ||
| 552 | }, | ||
| 553 | .dstOffset = | ||
| 554 | { | ||
| 555 | .x = static_cast<s32>(copy_params.dest_x), | ||
| 556 | .y = static_cast<s32>(copy_params.dest_y), | ||
| 557 | .z = static_cast<s32>(dst_offset_z), | ||
| 558 | }, | ||
| 559 | .extent = | ||
| 560 | { | ||
| 561 | .width = copy_params.width, | ||
| 562 | .height = copy_params.height, | ||
| 563 | .depth = extent_z, | ||
| 564 | }, | ||
| 565 | }; | ||
| 566 | 947 | ||
| 567 | const VkImage src_image = src_surface->GetImageHandle(); | 948 | vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { |
| 568 | const VkImage dst_image = dst_surface->GetImageHandle(); | 949 | return device->GetLogical().CreateImageView({ |
| 569 | scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { | 950 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 570 | cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | 951 | .pNext = nullptr, |
| 571 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | 952 | .flags = 0, |
| 953 | .image = image_handle, | ||
| 954 | .viewType = ImageViewType(type), | ||
| 955 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, | ||
| 956 | .components{ | ||
| 957 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 958 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 959 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 960 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 961 | }, | ||
| 962 | .subresourceRange = MakeSubresourceRange(aspect_mask, range), | ||
| 572 | }); | 963 | }); |
| 573 | } | 964 | } |
| 574 | 965 | ||
| 575 | void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | 966 | Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { |
| 576 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 967 | const auto& device = runtime.device; |
| 577 | // We can't blit inside a renderpass | 968 | const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); |
| 578 | scheduler.RequestOutsideRenderPassOperationContext(); | 969 | const std::array<float, 4> color = tsc.BorderColor(); |
| 579 | 970 | // C++20 bit_cast | |
| 580 | src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, | 971 | VkClearColorValue border_color; |
| 581 | VK_ACCESS_TRANSFER_READ_BIT); | 972 | std::memcpy(&border_color, &color, sizeof(color)); |
| 582 | dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, | 973 | const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ |
| 583 | VK_ACCESS_TRANSFER_WRITE_BIT); | 974 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, |
| 584 | 975 | .pNext = nullptr, | |
| 585 | VkImageBlit blit; | 976 | .customBorderColor = border_color, |
| 586 | blit.srcSubresource = src_view->GetImageSubresourceLayers(); | 977 | .format = VK_FORMAT_UNDEFINED, |
| 587 | blit.srcOffsets[0].x = copy_config.src_rect.left; | 978 | }; |
| 588 | blit.srcOffsets[0].y = copy_config.src_rect.top; | 979 | const void* pnext = nullptr; |
| 589 | blit.srcOffsets[0].z = 0; | 980 | if (arbitrary_borders) { |
| 590 | blit.srcOffsets[1].x = copy_config.src_rect.right; | 981 | pnext = &border_ci; |
| 591 | blit.srcOffsets[1].y = copy_config.src_rect.bottom; | 982 | } |
| 592 | blit.srcOffsets[1].z = 1; | 983 | const VkSamplerReductionModeCreateInfoEXT reduction_ci{ |
| 593 | blit.dstSubresource = dst_view->GetImageSubresourceLayers(); | 984 | .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, |
| 594 | blit.dstOffsets[0].x = copy_config.dst_rect.left; | 985 | .pNext = pnext, |
| 595 | blit.dstOffsets[0].y = copy_config.dst_rect.top; | 986 | .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), |
| 596 | blit.dstOffsets[0].z = 0; | 987 | }; |
| 597 | blit.dstOffsets[1].x = copy_config.dst_rect.right; | 988 | if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { |
| 598 | blit.dstOffsets[1].y = copy_config.dst_rect.bottom; | 989 | pnext = &reduction_ci; |
| 599 | blit.dstOffsets[1].z = 1; | 990 | } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { |
| 600 | 991 | LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); | |
| 601 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | 992 | } |
| 602 | 993 | // Some games have samplers with garbage. Sanitize them here. | |
| 603 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | 994 | const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); |
| 604 | is_linear](vk::CommandBuffer cmdbuf) { | 995 | sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ |
| 605 | cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | 996 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, |
| 606 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, | 997 | .pNext = pnext, |
| 607 | is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); | 998 | .flags = 0, |
| 999 | .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), | ||
| 1000 | .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), | ||
| 1001 | .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | ||
| 1002 | .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), | ||
| 1003 | .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), | ||
| 1004 | .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), | ||
| 1005 | .mipLodBias = tsc.LodBias(), | ||
| 1006 | .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), | ||
| 1007 | .maxAnisotropy = max_anisotropy, | ||
| 1008 | .compareEnable = tsc.depth_compare_enabled, | ||
| 1009 | .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), | ||
| 1010 | .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), | ||
| 1011 | .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), | ||
| 1012 | .borderColor = | ||
| 1013 | arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), | ||
| 1014 | .unnormalizedCoordinates = VK_FALSE, | ||
| 608 | }); | 1015 | }); |
| 609 | } | 1016 | } |
| 610 | 1017 | ||
| 611 | void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 1018 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 612 | // Currently unimplemented. PBO copies should be dropped and we should use a render pass to | 1019 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 613 | // convert from color to depth and viceversa. | 1020 | std::vector<VkAttachmentDescription> descriptions; |
| 614 | LOG_WARNING(Render_Vulkan, "Unimplemented"); | 1021 | std::vector<VkImageView> attachments; |
| 1022 | RenderPassKey renderpass_key{}; | ||
| 1023 | s32 num_layers = 1; | ||
| 1024 | |||
| 1025 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 1026 | const ImageView* const color_buffer = color_buffers[index]; | ||
| 1027 | if (!color_buffer) { | ||
| 1028 | renderpass_key.color_formats[index] = PixelFormat::Invalid; | ||
| 1029 | continue; | ||
| 1030 | } | ||
| 1031 | descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); | ||
| 1032 | attachments.push_back(color_buffer->RenderTarget()); | ||
| 1033 | renderpass_key.color_formats[index] = color_buffer->format; | ||
| 1034 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); | ||
| 1035 | images[num_images] = color_buffer->ImageHandle(); | ||
| 1036 | image_ranges[num_images] = MakeSubresourceRange(color_buffer); | ||
| 1037 | samples = color_buffer->Samples(); | ||
| 1038 | ++num_images; | ||
| 1039 | } | ||
| 1040 | const size_t num_colors = attachments.size(); | ||
| 1041 | const VkAttachmentReference* depth_attachment = | ||
| 1042 | depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; | ||
| 1043 | if (depth_buffer) { | ||
| 1044 | descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); | ||
| 1045 | attachments.push_back(depth_buffer->RenderTarget()); | ||
| 1046 | renderpass_key.depth_format = depth_buffer->format; | ||
| 1047 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); | ||
| 1048 | images[num_images] = depth_buffer->ImageHandle(); | ||
| 1049 | image_ranges[num_images] = MakeSubresourceRange(depth_buffer); | ||
| 1050 | samples = depth_buffer->Samples(); | ||
| 1051 | ++num_images; | ||
| 1052 | } else { | ||
| 1053 | renderpass_key.depth_format = PixelFormat::Invalid; | ||
| 1054 | } | ||
| 1055 | renderpass_key.samples = samples; | ||
| 1056 | |||
| 1057 | const auto& device = runtime.device.GetLogical(); | ||
| 1058 | const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); | ||
| 1059 | if (is_new) { | ||
| 1060 | const VkSubpassDescription subpass{ | ||
| 1061 | .flags = 0, | ||
| 1062 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 1063 | .inputAttachmentCount = 0, | ||
| 1064 | .pInputAttachments = nullptr, | ||
| 1065 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 1066 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 1067 | .pResolveAttachments = nullptr, | ||
| 1068 | .pDepthStencilAttachment = depth_attachment, | ||
| 1069 | .preserveAttachmentCount = 0, | ||
| 1070 | .pPreserveAttachments = nullptr, | ||
| 1071 | }; | ||
| 1072 | cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ | ||
| 1073 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 1074 | .pNext = nullptr, | ||
| 1075 | .flags = 0, | ||
| 1076 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 1077 | .pAttachments = descriptions.data(), | ||
| 1078 | .subpassCount = 1, | ||
| 1079 | .pSubpasses = &subpass, | ||
| 1080 | .dependencyCount = 0, | ||
| 1081 | .pDependencies = nullptr, | ||
| 1082 | }); | ||
| 1083 | } | ||
| 1084 | renderpass = *cache_pair->second; | ||
| 1085 | render_area = VkExtent2D{ | ||
| 1086 | .width = key.size.width, | ||
| 1087 | .height = key.size.height, | ||
| 1088 | }; | ||
| 1089 | num_color_buffers = static_cast<u32>(num_colors); | ||
| 1090 | framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ | ||
| 1091 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 1092 | .pNext = nullptr, | ||
| 1093 | .flags = 0, | ||
| 1094 | .renderPass = renderpass, | ||
| 1095 | .attachmentCount = static_cast<u32>(attachments.size()), | ||
| 1096 | .pAttachments = attachments.data(), | ||
| 1097 | .width = key.size.width, | ||
| 1098 | .height = key.size.height, | ||
| 1099 | .layers = static_cast<u32>(num_layers), | ||
| 1100 | }); | ||
| 1101 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 1102 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | ||
| 1103 | } | ||
| 615 | } | 1104 | } |
| 616 | 1105 | ||
| 617 | } // namespace Vulkan | 1106 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b0be4cb0f..edc3d80c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -4,217 +4,265 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <compare> |
| 8 | #include <unordered_map> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 10 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/renderer_vulkan/wrapper.h" |
| 15 | #include "video_core/texture_cache/surface_base.h" | ||
| 16 | #include "video_core/texture_cache/texture_cache.h" | 12 | #include "video_core/texture_cache/texture_cache.h" |
| 17 | 13 | ||
| 18 | namespace VideoCore { | ||
| 19 | class RasterizerInterface; | ||
| 20 | } | ||
| 21 | |||
| 22 | namespace Vulkan { | 14 | namespace Vulkan { |
| 23 | 15 | ||
| 24 | class RasterizerVulkan; | 16 | using VideoCommon::ImageId; |
| 17 | using VideoCommon::NUM_RT; | ||
| 18 | using VideoCommon::Offset2D; | ||
| 19 | using VideoCommon::RenderTargets; | ||
| 20 | using VideoCore::Surface::PixelFormat; | ||
| 21 | |||
| 25 | class VKDevice; | 22 | class VKDevice; |
| 26 | class VKScheduler; | 23 | class VKScheduler; |
| 27 | class VKStagingBufferPool; | 24 | class VKStagingBufferPool; |
| 28 | 25 | ||
| 29 | class CachedSurfaceView; | 26 | class BlitImageHelper; |
| 30 | class CachedSurface; | 27 | class Image; |
| 28 | class ImageView; | ||
| 29 | class Framebuffer; | ||
| 31 | 30 | ||
| 32 | using Surface = std::shared_ptr<CachedSurface>; | 31 | struct RenderPassKey { |
| 33 | using View = std::shared_ptr<CachedSurfaceView>; | 32 | constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; |
| 34 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 35 | 33 | ||
| 36 | using VideoCommon::SurfaceParams; | 34 | std::array<PixelFormat, NUM_RT> color_formats; |
| 37 | using VideoCommon::ViewParams; | 35 | PixelFormat depth_format; |
| 36 | VkSampleCountFlagBits samples; | ||
| 37 | }; | ||
| 38 | 38 | ||
| 39 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | 39 | } // namespace Vulkan |
| 40 | friend CachedSurfaceView; | ||
| 41 | 40 | ||
| 42 | public: | 41 | namespace std { |
| 43 | explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, | 42 | template <> |
| 44 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, | 43 | struct hash<Vulkan::RenderPassKey> { |
| 45 | GPUVAddr gpu_addr_, const SurfaceParams& params_); | 44 | [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { |
| 46 | ~CachedSurface(); | 45 | size_t value = static_cast<size_t>(key.depth_format) << 48; |
| 46 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 47 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 48 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 49 | } | ||
| 50 | return value; | ||
| 51 | } | ||
| 52 | }; | ||
| 53 | } // namespace std | ||
| 47 | 54 | ||
| 48 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | 55 | namespace Vulkan { |
| 49 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 50 | 56 | ||
| 51 | void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | 57 | struct ImageBufferMap { |
| 52 | VkImageLayout new_layout) { | 58 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 53 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, | 59 | return handle; |
| 54 | new_stage_mask, new_access, new_layout); | ||
| 55 | } | 60 | } |
| 56 | 61 | ||
| 57 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 62 | [[nodiscard]] std::span<u8> Span() const noexcept { |
| 58 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | 63 | return map.Span(); |
| 59 | VkImageLayout new_layout) { | ||
| 60 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 61 | new_access, new_layout); | ||
| 62 | } | 64 | } |
| 63 | 65 | ||
| 64 | VKImage& GetImage() { | 66 | VkBuffer handle; |
| 65 | return *image; | 67 | MemoryMap map; |
| 66 | } | 68 | }; |
| 67 | 69 | ||
| 68 | const VKImage& GetImage() const { | 70 | struct TextureCacheRuntime { |
| 69 | return *image; | 71 | const VKDevice& device; |
| 70 | } | 72 | VKScheduler& scheduler; |
| 73 | VKMemoryManager& memory_manager; | ||
| 74 | VKStagingBufferPool& staging_buffer_pool; | ||
| 75 | BlitImageHelper& blit_image_helper; | ||
| 76 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache; | ||
| 77 | |||
| 78 | void Finish(); | ||
| 71 | 79 | ||
| 72 | VkImage GetImageHandle() const { | 80 | [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); |
| 73 | return *image->GetHandle(); | 81 | |
| 82 | [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { | ||
| 83 | // TODO: Have a special function for this | ||
| 84 | return MapUploadBuffer(size); | ||
| 74 | } | 85 | } |
| 75 | 86 | ||
| 76 | VkImageAspectFlags GetAspectMask() const { | 87 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 77 | return image->GetAspectMask(); | 88 | const std::array<Offset2D, 2>& dst_region, |
| 89 | const std::array<Offset2D, 2>& src_region, | ||
| 90 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 91 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 92 | |||
| 93 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 94 | |||
| 95 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); | ||
| 96 | |||
| 97 | [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { | ||
| 98 | return false; | ||
| 78 | } | 99 | } |
| 79 | 100 | ||
| 80 | VkBufferView GetBufferViewHandle() const { | 101 | void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, |
| 81 | return *buffer_view; | 102 | std::span<const VideoCommon::SwizzleParameters>) { |
| 103 | UNREACHABLE(); | ||
| 82 | } | 104 | } |
| 83 | 105 | ||
| 84 | protected: | 106 | void InsertUploadMemoryBarrier() {} |
| 85 | void DecorateSurfaceName() override; | 107 | }; |
| 86 | 108 | ||
| 87 | View CreateView(const ViewParams& view_params) override; | 109 | class Image : public VideoCommon::ImageBase { |
| 110 | public: | ||
| 111 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | ||
| 112 | VAddr cpu_addr); | ||
| 88 | 113 | ||
| 89 | private: | 114 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 90 | void UploadBuffer(const std::vector<u8>& staging_buffer); | 115 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 91 | 116 | ||
| 92 | void UploadImage(const std::vector<u8>& staging_buffer); | 117 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 118 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 93 | 119 | ||
| 94 | VkBufferImageCopy GetBufferImageCopy(u32 level) const; | 120 | void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 121 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 95 | 122 | ||
| 96 | VkImageSubresourceRange GetImageSubresourceRange() const; | 123 | [[nodiscard]] VkImage Handle() const noexcept { |
| 124 | return *image; | ||
| 125 | } | ||
| 97 | 126 | ||
| 98 | const VKDevice& device; | 127 | [[nodiscard]] VkBuffer Buffer() const noexcept { |
| 99 | VKMemoryManager& memory_manager; | 128 | return *buffer; |
| 100 | VKScheduler& scheduler; | 129 | } |
| 101 | VKStagingBufferPool& staging_pool; | 130 | |
| 131 | [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { | ||
| 132 | return aspect_mask; | ||
| 133 | } | ||
| 102 | 134 | ||
| 103 | std::optional<VKImage> image; | 135 | private: |
| 136 | VKScheduler* scheduler; | ||
| 137 | vk::Image image; | ||
| 104 | vk::Buffer buffer; | 138 | vk::Buffer buffer; |
| 105 | vk::BufferView buffer_view; | ||
| 106 | VKMemoryCommit commit; | 139 | VKMemoryCommit commit; |
| 107 | 140 | VkImageAspectFlags aspect_mask = 0; | |
| 108 | VkFormat format = VK_FORMAT_UNDEFINED; | 141 | bool initialized = false; |
| 109 | }; | 142 | }; |
| 110 | 143 | ||
| 111 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 144 | class ImageView : public VideoCommon::ImageViewBase { |
| 112 | public: | 145 | public: |
| 113 | explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, | 146 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 114 | const ViewParams& view_params_); | 147 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 115 | ~CachedSurfaceView(); | ||
| 116 | 148 | ||
| 117 | VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, | 149 | [[nodiscard]] VkImageView DepthView(); |
| 118 | Tegra::Texture::SwizzleSource y_source, | ||
| 119 | Tegra::Texture::SwizzleSource z_source, | ||
| 120 | Tegra::Texture::SwizzleSource w_source); | ||
| 121 | 150 | ||
| 122 | VkImageView GetAttachment(); | 151 | [[nodiscard]] VkImageView StencilView(); |
| 123 | 152 | ||
| 124 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | 153 | [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { |
| 125 | return &surface == &rhs.surface; | 154 | return *image_views[static_cast<size_t>(query_type)]; |
| 126 | } | 155 | } |
| 127 | 156 | ||
| 128 | u32 GetWidth() const { | 157 | [[nodiscard]] VkBufferView BufferView() const noexcept { |
| 129 | return surface_params.GetMipWidth(base_level); | 158 | return *buffer_view; |
| 130 | } | 159 | } |
| 131 | 160 | ||
| 132 | u32 GetHeight() const { | 161 | [[nodiscard]] VkImage ImageHandle() const noexcept { |
| 133 | return surface_params.GetMipHeight(base_level); | 162 | return image_handle; |
| 134 | } | 163 | } |
| 135 | 164 | ||
| 136 | u32 GetNumLayers() const { | 165 | [[nodiscard]] VkImageView RenderTarget() const noexcept { |
| 137 | return num_layers; | 166 | return render_target; |
| 138 | } | 167 | } |
| 139 | 168 | ||
| 140 | bool IsBufferView() const { | 169 | [[nodiscard]] PixelFormat ImageFormat() const noexcept { |
| 141 | return buffer_view; | 170 | return image_format; |
| 142 | } | 171 | } |
| 143 | 172 | ||
| 144 | VkImage GetImage() const { | 173 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 145 | return image; | 174 | return samples; |
| 146 | } | 175 | } |
| 147 | 176 | ||
| 148 | VkBufferView GetBufferView() const { | 177 | private: |
| 149 | return buffer_view; | 178 | [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); |
| 150 | } | ||
| 151 | 179 | ||
| 152 | VkImageSubresourceRange GetImageSubresourceRange() const { | 180 | const VKDevice* device = nullptr; |
| 153 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; | 181 | std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; |
| 154 | } | 182 | vk::ImageView depth_view; |
| 183 | vk::ImageView stencil_view; | ||
| 184 | vk::BufferView buffer_view; | ||
| 185 | VkImage image_handle = VK_NULL_HANDLE; | ||
| 186 | VkImageView render_target = VK_NULL_HANDLE; | ||
| 187 | PixelFormat image_format = PixelFormat::Invalid; | ||
| 188 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 189 | }; | ||
| 155 | 190 | ||
| 156 | VkImageSubresourceLayers GetImageSubresourceLayers() const { | 191 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
| 157 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; | ||
| 158 | } | ||
| 159 | 192 | ||
| 160 | void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, | 193 | class Sampler { |
| 161 | VkAccessFlags new_access) const { | 194 | public: |
| 162 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | 195 | explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); |
| 163 | new_access, new_layout); | ||
| 164 | } | ||
| 165 | 196 | ||
| 166 | void MarkAsModified(u64 tick) { | 197 | [[nodiscard]] VkSampler Handle() const noexcept { |
| 167 | surface.MarkAsModified(true, tick); | 198 | return *sampler; |
| 168 | } | 199 | } |
| 169 | 200 | ||
| 170 | private: | 201 | private: |
| 171 | // Store a copy of these values to avoid double dereference when reading them | 202 | vk::Sampler sampler; |
| 172 | const SurfaceParams surface_params; | ||
| 173 | const VkImage image; | ||
| 174 | const VkBufferView buffer_view; | ||
| 175 | const VkImageAspectFlags aspect_mask; | ||
| 176 | |||
| 177 | const VKDevice& device; | ||
| 178 | CachedSurface& surface; | ||
| 179 | const u32 base_level; | ||
| 180 | const u32 num_levels; | ||
| 181 | const VkImageViewType image_view_type; | ||
| 182 | u32 base_layer = 0; | ||
| 183 | u32 num_layers = 0; | ||
| 184 | u32 base_slice = 0; | ||
| 185 | u32 num_slices = 0; | ||
| 186 | |||
| 187 | VkImageView last_image_view = nullptr; | ||
| 188 | u32 last_swizzle = 0; | ||
| 189 | |||
| 190 | vk::ImageView render_target; | ||
| 191 | std::unordered_map<u32, vk::ImageView> view_cache; | ||
| 192 | }; | 203 | }; |
| 193 | 204 | ||
| 194 | class VKTextureCache final : public TextureCacheBase { | 205 | class Framebuffer { |
| 195 | public: | 206 | public: |
| 196 | explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, | 207 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, |
| 197 | Tegra::Engines::Maxwell3D& maxwell3d_, | 208 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 198 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | ||
| 199 | VKMemoryManager& memory_manager_, VKScheduler& scheduler_, | ||
| 200 | VKStagingBufferPool& staging_pool_); | ||
| 201 | ~VKTextureCache(); | ||
| 202 | 209 | ||
| 203 | private: | 210 | [[nodiscard]] VkFramebuffer Handle() const noexcept { |
| 204 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | 211 | return *framebuffer; |
| 212 | } | ||
| 205 | 213 | ||
| 206 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | 214 | [[nodiscard]] VkRenderPass RenderPass() const noexcept { |
| 207 | const VideoCommon::CopyParams& copy_params) override; | 215 | return renderpass; |
| 216 | } | ||
| 208 | 217 | ||
| 209 | void ImageBlit(View& src_view, View& dst_view, | 218 | [[nodiscard]] VkExtent2D RenderArea() const noexcept { |
| 210 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 219 | return render_area; |
| 220 | } | ||
| 211 | 221 | ||
| 212 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | 222 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 223 | return samples; | ||
| 224 | } | ||
| 213 | 225 | ||
| 214 | const VKDevice& device; | 226 | [[nodiscard]] u32 NumColorBuffers() const noexcept { |
| 215 | VKMemoryManager& memory_manager; | 227 | return num_color_buffers; |
| 216 | VKScheduler& scheduler; | 228 | } |
| 217 | VKStagingBufferPool& staging_pool; | 229 | |
| 230 | [[nodiscard]] u32 NumImages() const noexcept { | ||
| 231 | return num_images; | ||
| 232 | } | ||
| 233 | |||
| 234 | [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept { | ||
| 235 | return images; | ||
| 236 | } | ||
| 237 | |||
| 238 | [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept { | ||
| 239 | return image_ranges; | ||
| 240 | } | ||
| 241 | |||
| 242 | private: | ||
| 243 | vk::Framebuffer framebuffer; | ||
| 244 | VkRenderPass renderpass{}; | ||
| 245 | VkExtent2D render_area{}; | ||
| 246 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 247 | u32 num_color_buffers = 0; | ||
| 248 | u32 num_images = 0; | ||
| 249 | std::array<VkImage, 9> images{}; | ||
| 250 | std::array<VkImageSubresourceRange, 9> image_ranges{}; | ||
| 251 | }; | ||
| 252 | |||
| 253 | struct TextureCacheParams { | ||
| 254 | static constexpr bool ENABLE_VALIDATION = true; | ||
| 255 | static constexpr bool FRAMEBUFFER_BLITS = false; | ||
| 256 | static constexpr bool HAS_EMULATED_COPIES = false; | ||
| 257 | |||
| 258 | using Runtime = Vulkan::TextureCacheRuntime; | ||
| 259 | using Image = Vulkan::Image; | ||
| 260 | using ImageAlloc = Vulkan::ImageAlloc; | ||
| 261 | using ImageView = Vulkan::ImageView; | ||
| 262 | using Sampler = Vulkan::Sampler; | ||
| 263 | using Framebuffer = Vulkan::Framebuffer; | ||
| 218 | }; | 264 | }; |
| 219 | 265 | ||
| 266 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||
| 267 | |||
| 220 | } // namespace Vulkan | 268 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f7e3c9821..f098a8540 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -40,30 +40,34 @@ public: | |||
| 40 | 40 | ||
| 41 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); | 41 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); |
| 42 | 42 | ||
| 43 | void AddSampledImage(VkSampler sampler, VkImageView image_view) { | 43 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { |
| 44 | payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); | 44 | payload.emplace_back(VkDescriptorImageInfo{ |
| 45 | .sampler = sampler, | ||
| 46 | .imageView = image_view, | ||
| 47 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 48 | }); | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | void AddImage(VkImageView image_view) { | 51 | void AddImage(VkImageView image_view) { |
| 48 | payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); | 52 | payload.emplace_back(VkDescriptorImageInfo{ |
| 53 | .sampler = VK_NULL_HANDLE, | ||
| 54 | .imageView = image_view, | ||
| 55 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 56 | }); | ||
| 49 | } | 57 | } |
| 50 | 58 | ||
| 51 | void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { | 59 | void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { |
| 52 | payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); | 60 | payload.emplace_back(VkDescriptorBufferInfo{ |
| 61 | .buffer = buffer, | ||
| 62 | .offset = offset, | ||
| 63 | .range = size, | ||
| 64 | }); | ||
| 53 | } | 65 | } |
| 54 | 66 | ||
| 55 | void AddTexelBuffer(VkBufferView texel_buffer) { | 67 | void AddTexelBuffer(VkBufferView texel_buffer) { |
| 56 | payload.emplace_back(texel_buffer); | 68 | payload.emplace_back(texel_buffer); |
| 57 | } | 69 | } |
| 58 | 70 | ||
| 59 | VkImageLayout* LastImageLayout() { | ||
| 60 | return &payload.back().image.imageLayout; | ||
| 61 | } | ||
| 62 | |||
| 63 | const VkImageLayout* LastImageLayout() const { | ||
| 64 | return &payload.back().image.imageLayout; | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | 71 | private: |
| 68 | const VKDevice& device; | 72 | const VKDevice& device; |
| 69 | VKScheduler& scheduler; | 73 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 1eced809e..2a21e850d 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp | |||
| @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 81 | X(vkCmdBeginQuery); | 81 | X(vkCmdBeginQuery); |
| 82 | X(vkCmdBeginRenderPass); | 82 | X(vkCmdBeginRenderPass); |
| 83 | X(vkCmdBeginTransformFeedbackEXT); | 83 | X(vkCmdBeginTransformFeedbackEXT); |
| 84 | X(vkCmdBeginDebugUtilsLabelEXT); | ||
| 84 | X(vkCmdBindDescriptorSets); | 85 | X(vkCmdBindDescriptorSets); |
| 85 | X(vkCmdBindIndexBuffer); | 86 | X(vkCmdBindIndexBuffer); |
| 86 | X(vkCmdBindPipeline); | 87 | X(vkCmdBindPipeline); |
| @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 98 | X(vkCmdEndQuery); | 99 | X(vkCmdEndQuery); |
| 99 | X(vkCmdEndRenderPass); | 100 | X(vkCmdEndRenderPass); |
| 100 | X(vkCmdEndTransformFeedbackEXT); | 101 | X(vkCmdEndTransformFeedbackEXT); |
| 102 | X(vkCmdEndDebugUtilsLabelEXT); | ||
| 101 | X(vkCmdFillBuffer); | 103 | X(vkCmdFillBuffer); |
| 102 | X(vkCmdPipelineBarrier); | 104 | X(vkCmdPipelineBarrier); |
| 103 | X(vkCmdPushConstants); | 105 | X(vkCmdPushConstants); |
| @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 121 | X(vkCmdSetPrimitiveTopologyEXT); | 123 | X(vkCmdSetPrimitiveTopologyEXT); |
| 122 | X(vkCmdSetStencilOpEXT); | 124 | X(vkCmdSetStencilOpEXT); |
| 123 | X(vkCmdSetStencilTestEnableEXT); | 125 | X(vkCmdSetStencilTestEnableEXT); |
| 126 | X(vkCmdResolveImage); | ||
| 124 | X(vkCreateBuffer); | 127 | X(vkCreateBuffer); |
| 125 | X(vkCreateBufferView); | 128 | X(vkCreateBufferView); |
| 126 | X(vkCreateCommandPool); | 129 | X(vkCreateCommandPool); |
| @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 176 | X(vkQueueSubmit); | 179 | X(vkQueueSubmit); |
| 177 | X(vkResetFences); | 180 | X(vkResetFences); |
| 178 | X(vkResetQueryPoolEXT); | 181 | X(vkResetQueryPoolEXT); |
| 182 | X(vkSetDebugUtilsObjectNameEXT); | ||
| 183 | X(vkSetDebugUtilsObjectTagEXT); | ||
| 179 | X(vkUnmapMemory); | 184 | X(vkUnmapMemory); |
| 180 | X(vkUpdateDescriptorSetWithTemplateKHR); | 185 | X(vkUpdateDescriptorSetWithTemplateKHR); |
| 181 | X(vkUpdateDescriptorSets); | 186 | X(vkUpdateDescriptorSets); |
| @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 184 | #undef X | 189 | #undef X |
| 185 | } | 190 | } |
| 186 | 191 | ||
| 192 | template <typename T> | ||
| 193 | void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, | ||
| 194 | const char* name) { | ||
| 195 | const VkDebugUtilsObjectNameInfoEXT name_info{ | ||
| 196 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, | ||
| 197 | .pNext = nullptr, | ||
| 198 | .objectType = VK_OBJECT_TYPE_IMAGE, | ||
| 199 | .objectHandle = reinterpret_cast<u64>(handle), | ||
| 200 | .pObjectName = name, | ||
| 201 | }; | ||
| 202 | Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); | ||
| 203 | } | ||
| 204 | |||
| 187 | } // Anonymous namespace | 205 | } // Anonymous namespace |
| 188 | 206 | ||
| 189 | bool Load(InstanceDispatch& dld) noexcept { | 207 | bool Load(InstanceDispatch& dld) noexcept { |
| @@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback( | |||
| 476 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | | 494 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | |
| 477 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, | 495 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, |
| 478 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | | 496 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | |
| 479 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | | 497 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, |
| 480 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, | ||
| 481 | .pfnUserCallback = callback, | 498 | .pfnUserCallback = callback, |
| 482 | .pUserData = nullptr, | 499 | .pUserData = nullptr, |
| 483 | }; | 500 | }; |
| @@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | |||
| 493 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); | 510 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); |
| 494 | } | 511 | } |
| 495 | 512 | ||
| 513 | void Buffer::SetObjectNameEXT(const char* name) const { | ||
| 514 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); | ||
| 515 | } | ||
| 516 | |||
| 517 | void BufferView::SetObjectNameEXT(const char* name) const { | ||
| 518 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); | ||
| 519 | } | ||
| 520 | |||
| 496 | void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 521 | void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { |
| 497 | Check(dld->vkBindImageMemory(owner, handle, memory, offset)); | 522 | Check(dld->vkBindImageMemory(owner, handle, memory, offset)); |
| 498 | } | 523 | } |
| 499 | 524 | ||
| 525 | void Image::SetObjectNameEXT(const char* name) const { | ||
| 526 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); | ||
| 527 | } | ||
| 528 | |||
| 529 | void ImageView::SetObjectNameEXT(const char* name) const { | ||
| 530 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); | ||
| 531 | } | ||
| 532 | |||
| 533 | void DeviceMemory::SetObjectNameEXT(const char* name) const { | ||
| 534 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); | ||
| 535 | } | ||
| 536 | |||
| 537 | void Fence::SetObjectNameEXT(const char* name) const { | ||
| 538 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); | ||
| 539 | } | ||
| 540 | |||
| 541 | void Framebuffer::SetObjectNameEXT(const char* name) const { | ||
| 542 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); | ||
| 543 | } | ||
| 544 | |||
| 500 | DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { | 545 | DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { |
| 501 | const std::size_t num = ai.descriptorSetCount; | 546 | const std::size_t num = ai.descriptorSetCount; |
| 502 | std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); | 547 | std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); |
| @@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c | |||
| 510 | } | 555 | } |
| 511 | } | 556 | } |
| 512 | 557 | ||
| 558 | void DescriptorPool::SetObjectNameEXT(const char* name) const { | ||
| 559 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); | ||
| 560 | } | ||
| 561 | |||
| 513 | CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { | 562 | CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { |
| 514 | const VkCommandBufferAllocateInfo ai{ | 563 | const VkCommandBufferAllocateInfo ai{ |
| 515 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, | 564 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, |
| @@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev | |||
| 530 | } | 579 | } |
| 531 | } | 580 | } |
| 532 | 581 | ||
| 582 | void CommandPool::SetObjectNameEXT(const char* name) const { | ||
| 583 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); | ||
| 584 | } | ||
| 585 | |||
| 533 | std::vector<VkImage> SwapchainKHR::GetImages() const { | 586 | std::vector<VkImage> SwapchainKHR::GetImages() const { |
| 534 | u32 num; | 587 | u32 num; |
| 535 | Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); | 588 | Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); |
| @@ -538,6 +591,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { | |||
| 538 | return images; | 591 | return images; |
| 539 | } | 592 | } |
| 540 | 593 | ||
| 594 | void Event::SetObjectNameEXT(const char* name) const { | ||
| 595 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); | ||
| 596 | } | ||
| 597 | |||
| 598 | void ShaderModule::SetObjectNameEXT(const char* name) const { | ||
| 599 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); | ||
| 600 | } | ||
| 601 | |||
| 602 | void Semaphore::SetObjectNameEXT(const char* name) const { | ||
| 603 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); | ||
| 604 | } | ||
| 605 | |||
| 541 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, | 606 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, |
| 542 | Span<const char*> enabled_extensions, const void* next, | 607 | Span<const char*> enabled_extensions, const void* next, |
| 543 | DeviceDispatch& dispatch) noexcept { | 608 | DeviceDispatch& dispatch) noexcept { |
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 76f790eab..f9a184e00 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <limits> | 9 | #include <limits> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <optional> | 11 | #include <optional> |
| 12 | #include <span> | ||
| 12 | #include <type_traits> | 13 | #include <type_traits> |
| 13 | #include <utility> | 14 | #include <utility> |
| 14 | #include <vector> | 15 | #include <vector> |
| @@ -18,6 +19,10 @@ | |||
| 18 | 19 | ||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | 21 | ||
| 22 | #ifdef _MSC_VER | ||
| 23 | #pragma warning(disable : 26812) // Disable prefer enum class over enum | ||
| 24 | #endif | ||
| 25 | |||
| 21 | namespace Vulkan::vk { | 26 | namespace Vulkan::vk { |
| 22 | 27 | ||
| 23 | /** | 28 | /** |
| @@ -41,6 +46,9 @@ public: | |||
| 41 | /// Construct an empty span. | 46 | /// Construct an empty span. |
| 42 | constexpr Span() noexcept = default; | 47 | constexpr Span() noexcept = default; |
| 43 | 48 | ||
| 49 | /// Construct an empty span | ||
| 50 | constexpr Span(std::nullptr_t) noexcept {} | ||
| 51 | |||
| 44 | /// Construct a span from a single element. | 52 | /// Construct a span from a single element. |
| 45 | constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} | 53 | constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} |
| 46 | 54 | ||
| @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 177 | PFN_vkCmdBeginQuery vkCmdBeginQuery; | 185 | PFN_vkCmdBeginQuery vkCmdBeginQuery; |
| 178 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; | 186 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; |
| 179 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; | 187 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; |
| 188 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; | ||
| 180 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; | 189 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; |
| 181 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; | 190 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; |
| 182 | PFN_vkCmdBindPipeline vkCmdBindPipeline; | 191 | PFN_vkCmdBindPipeline vkCmdBindPipeline; |
| @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 194 | PFN_vkCmdEndQuery vkCmdEndQuery; | 203 | PFN_vkCmdEndQuery vkCmdEndQuery; |
| 195 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass; | 204 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass; |
| 196 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; | 205 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; |
| 206 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; | ||
| 197 | PFN_vkCmdFillBuffer vkCmdFillBuffer; | 207 | PFN_vkCmdFillBuffer vkCmdFillBuffer; |
| 198 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; | 208 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; |
| 199 | PFN_vkCmdPushConstants vkCmdPushConstants; | 209 | PFN_vkCmdPushConstants vkCmdPushConstants; |
| @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 217 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; | 227 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; |
| 218 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; | 228 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; |
| 219 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; | 229 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; |
| 230 | PFN_vkCmdResolveImage vkCmdResolveImage; | ||
| 220 | PFN_vkCreateBuffer vkCreateBuffer; | 231 | PFN_vkCreateBuffer vkCreateBuffer; |
| 221 | PFN_vkCreateBufferView vkCreateBufferView; | 232 | PFN_vkCreateBufferView vkCreateBufferView; |
| 222 | PFN_vkCreateCommandPool vkCreateCommandPool; | 233 | PFN_vkCreateCommandPool vkCreateCommandPool; |
| @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 272 | PFN_vkQueueSubmit vkQueueSubmit; | 283 | PFN_vkQueueSubmit vkQueueSubmit; |
| 273 | PFN_vkResetFences vkResetFences; | 284 | PFN_vkResetFences vkResetFences; |
| 274 | PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; | 285 | PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; |
| 286 | PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; | ||
| 287 | PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; | ||
| 275 | PFN_vkUnmapMemory vkUnmapMemory; | 288 | PFN_vkUnmapMemory vkUnmapMemory; |
| 276 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; | 289 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; |
| 277 | PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; | 290 | PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; |
| @@ -542,18 +555,14 @@ private: | |||
| 542 | const DeviceDispatch* dld = nullptr; | 555 | const DeviceDispatch* dld = nullptr; |
| 543 | }; | 556 | }; |
| 544 | 557 | ||
| 545 | using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>; | ||
| 546 | using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; | 558 | using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; |
| 547 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; | 559 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; |
| 548 | using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; | 560 | using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; |
| 549 | using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>; | ||
| 550 | using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>; | ||
| 551 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; | 561 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; |
| 552 | using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; | 562 | using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; |
| 553 | using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; | 563 | using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; |
| 554 | using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; | 564 | using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; |
| 555 | using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; | 565 | using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; |
| 556 | using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; | ||
| 557 | using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; | 566 | using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; |
| 558 | 567 | ||
| 559 | using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; | 568 | using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; |
| @@ -605,6 +614,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { | |||
| 605 | public: | 614 | public: |
| 606 | /// Attaches a memory allocation. | 615 | /// Attaches a memory allocation. |
| 607 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | 616 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; |
| 617 | |||
| 618 | /// Set object name. | ||
| 619 | void SetObjectNameEXT(const char* name) const; | ||
| 620 | }; | ||
| 621 | |||
| 622 | class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { | ||
| 623 | using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; | ||
| 624 | |||
| 625 | public: | ||
| 626 | /// Set object name. | ||
| 627 | void SetObjectNameEXT(const char* name) const; | ||
| 608 | }; | 628 | }; |
| 609 | 629 | ||
| 610 | class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { | 630 | class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { |
| @@ -613,12 +633,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { | |||
| 613 | public: | 633 | public: |
| 614 | /// Attaches a memory allocation. | 634 | /// Attaches a memory allocation. |
| 615 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | 635 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; |
| 636 | |||
| 637 | /// Set object name. | ||
| 638 | void SetObjectNameEXT(const char* name) const; | ||
| 639 | }; | ||
| 640 | |||
| 641 | class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { | ||
| 642 | using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; | ||
| 643 | |||
| 644 | public: | ||
| 645 | /// Set object name. | ||
| 646 | void SetObjectNameEXT(const char* name) const; | ||
| 616 | }; | 647 | }; |
| 617 | 648 | ||
| 618 | class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { | 649 | class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { |
| 619 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; | 650 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; |
| 620 | 651 | ||
| 621 | public: | 652 | public: |
| 653 | /// Set object name. | ||
| 654 | void SetObjectNameEXT(const char* name) const; | ||
| 655 | |||
| 622 | u8* Map(VkDeviceSize offset, VkDeviceSize size) const { | 656 | u8* Map(VkDeviceSize offset, VkDeviceSize size) const { |
| 623 | void* data; | 657 | void* data; |
| 624 | Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); | 658 | Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); |
| @@ -634,6 +668,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> { | |||
| 634 | using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; | 668 | using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; |
| 635 | 669 | ||
| 636 | public: | 670 | public: |
| 671 | /// Set object name. | ||
| 672 | void SetObjectNameEXT(const char* name) const; | ||
| 673 | |||
| 637 | VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { | 674 | VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { |
| 638 | return dld->vkWaitForFences(owner, 1, &handle, true, timeout); | 675 | return dld->vkWaitForFences(owner, 1, &handle, true, timeout); |
| 639 | } | 676 | } |
| @@ -647,11 +684,22 @@ public: | |||
| 647 | } | 684 | } |
| 648 | }; | 685 | }; |
| 649 | 686 | ||
| 687 | class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> { | ||
| 688 | using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle; | ||
| 689 | |||
| 690 | public: | ||
| 691 | /// Set object name. | ||
| 692 | void SetObjectNameEXT(const char* name) const; | ||
| 693 | }; | ||
| 694 | |||
| 650 | class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { | 695 | class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { |
| 651 | using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; | 696 | using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; |
| 652 | 697 | ||
| 653 | public: | 698 | public: |
| 654 | DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; | 699 | DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; |
| 700 | |||
| 701 | /// Set object name. | ||
| 702 | void SetObjectNameEXT(const char* name) const; | ||
| 655 | }; | 703 | }; |
| 656 | 704 | ||
| 657 | class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { | 705 | class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { |
| @@ -660,6 +708,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { | |||
| 660 | public: | 708 | public: |
| 661 | CommandBuffers Allocate(std::size_t num_buffers, | 709 | CommandBuffers Allocate(std::size_t num_buffers, |
| 662 | VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; | 710 | VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; |
| 711 | |||
| 712 | /// Set object name. | ||
| 713 | void SetObjectNameEXT(const char* name) const; | ||
| 663 | }; | 714 | }; |
| 664 | 715 | ||
| 665 | class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { | 716 | class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { |
| @@ -673,15 +724,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { | |||
| 673 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; | 724 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; |
| 674 | 725 | ||
| 675 | public: | 726 | public: |
| 727 | /// Set object name. | ||
| 728 | void SetObjectNameEXT(const char* name) const; | ||
| 729 | |||
| 676 | VkResult GetStatus() const noexcept { | 730 | VkResult GetStatus() const noexcept { |
| 677 | return dld->vkGetEventStatus(owner, handle); | 731 | return dld->vkGetEventStatus(owner, handle); |
| 678 | } | 732 | } |
| 679 | }; | 733 | }; |
| 680 | 734 | ||
| 735 | class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> { | ||
| 736 | using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle; | ||
| 737 | |||
| 738 | public: | ||
| 739 | /// Set object name. | ||
| 740 | void SetObjectNameEXT(const char* name) const; | ||
| 741 | }; | ||
| 742 | |||
| 681 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { | 743 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { |
| 682 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; | 744 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; |
| 683 | 745 | ||
| 684 | public: | 746 | public: |
| 747 | /// Set object name. | ||
| 748 | void SetObjectNameEXT(const char* name) const; | ||
| 749 | |||
| 685 | [[nodiscard]] u64 GetCounter() const { | 750 | [[nodiscard]] u64 GetCounter() const { |
| 686 | u64 value; | 751 | u64 value; |
| 687 | Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); | 752 | Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); |
| @@ -932,6 +997,12 @@ public: | |||
| 932 | regions.data(), filter); | 997 | regions.data(), filter); |
| 933 | } | 998 | } |
| 934 | 999 | ||
| 1000 | void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, | ||
| 1001 | VkImageLayout dst_layout, Span<VkImageResolve> regions) { | ||
| 1002 | dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), | ||
| 1003 | regions.data()); | ||
| 1004 | } | ||
| 1005 | |||
| 935 | void Dispatch(u32 x, u32 y, u32 z) const noexcept { | 1006 | void Dispatch(u32 x, u32 y, u32 z) const noexcept { |
| 936 | dld->vkCmdDispatch(handle, x, y, z); | 1007 | dld->vkCmdDispatch(handle, x, y, z); |
| 937 | } | 1008 | } |
| @@ -946,6 +1017,23 @@ public: | |||
| 946 | image_barriers.size(), image_barriers.data()); | 1017 | image_barriers.size(), image_barriers.data()); |
| 947 | } | 1018 | } |
| 948 | 1019 | ||
| 1020 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1021 | VkDependencyFlags dependency_flags = 0) const noexcept { | ||
| 1022 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1026 | VkDependencyFlags dependency_flags, | ||
| 1027 | const VkBufferMemoryBarrier& buffer_barrier) const noexcept { | ||
| 1028 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1032 | VkDependencyFlags dependency_flags, | ||
| 1033 | const VkImageMemoryBarrier& image_barrier) const noexcept { | ||
| 1034 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); | ||
| 1035 | } | ||
| 1036 | |||
| 949 | void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, | 1037 | void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, |
| 950 | Span<VkBufferImageCopy> regions) const noexcept { | 1038 | Span<VkBufferImageCopy> regions) const noexcept { |
| 951 | dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), | 1039 | dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), |
| @@ -979,6 +1067,13 @@ public: | |||
| 979 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); | 1067 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); |
| 980 | } | 1068 | } |
| 981 | 1069 | ||
| 1070 | template <typename T> | ||
| 1071 | void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, | ||
| 1072 | const T& data) const noexcept { | ||
| 1073 | static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable"); | ||
| 1074 | dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data); | ||
| 1075 | } | ||
| 1076 | |||
| 982 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { | 1077 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { |
| 983 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); | 1078 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); |
| 984 | } | 1079 | } |
| @@ -1088,6 +1183,20 @@ public: | |||
| 1088 | counter_buffers, counter_buffer_offsets); | 1183 | counter_buffers, counter_buffer_offsets); |
| 1089 | } | 1184 | } |
| 1090 | 1185 | ||
| 1186 | void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { | ||
| 1187 | const VkDebugUtilsLabelEXT label_info{ | ||
| 1188 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, | ||
| 1189 | .pNext = nullptr, | ||
| 1190 | .pLabelName = label, | ||
| 1191 | .color{color[0], color[1], color[2], color[3]}, | ||
| 1192 | }; | ||
| 1193 | dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | void EndDebugUtilsLabelEXT() const noexcept { | ||
| 1197 | dld->vkCmdEndDebugUtilsLabelEXT(handle); | ||
| 1198 | } | ||
| 1199 | |||
| 1091 | private: | 1200 | private: |
| 1092 | VkCommandBuffer handle; | 1201 | VkCommandBuffer handle; |
| 1093 | const DeviceDispatch* dld; | 1202 | const DeviceDispatch* dld; |
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null | |||
| @@ -1,21 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/cityhash.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/sampler_cache.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | std::size_t SamplerCacheKey::Hash() const { | ||
| 12 | static_assert(sizeof(raw) % sizeof(u64) == 0); | ||
| 13 | return static_cast<std::size_t>( | ||
| 14 | Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); | ||
| 15 | } | ||
| 16 | |||
| 17 | bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { | ||
| 18 | return raw == rhs.raw; | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace VideoCommon | ||
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null | |||
| @@ -1,60 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { | ||
| 15 | std::size_t Hash() const; | ||
| 16 | |||
| 17 | bool operator==(const SamplerCacheKey& rhs) const; | ||
| 18 | |||
| 19 | bool operator!=(const SamplerCacheKey& rhs) const { | ||
| 20 | return !operator==(rhs); | ||
| 21 | } | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // namespace VideoCommon | ||
| 25 | |||
| 26 | namespace std { | ||
| 27 | |||
| 28 | template <> | ||
| 29 | struct hash<VideoCommon::SamplerCacheKey> { | ||
| 30 | std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { | ||
| 31 | return k.Hash(); | ||
| 32 | } | ||
| 33 | }; | ||
| 34 | |||
| 35 | } // namespace std | ||
| 36 | |||
| 37 | namespace VideoCommon { | ||
| 38 | |||
| 39 | template <typename SamplerType, typename SamplerStorageType> | ||
| 40 | class SamplerCache { | ||
| 41 | public: | ||
| 42 | SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { | ||
| 43 | const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); | ||
| 44 | auto& sampler = entry->second; | ||
| 45 | if (is_cache_miss) { | ||
| 46 | sampler = CreateSampler(tsc); | ||
| 47 | } | ||
| 48 | return ToSamplerType(sampler); | ||
| 49 | } | ||
| 50 | |||
| 51 | protected: | ||
| 52 | virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; | ||
| 53 | |||
| 54 | virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; | ||
| 55 | |||
| 56 | private: | ||
| 57 | std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; | ||
| 58 | }; | ||
| 59 | |||
| 60 | } // namespace VideoCommon \ No newline at end of file | ||
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 78245473c..09f93463b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp | |||
| @@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | |||
| 137 | const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, | 137 | const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, |
| 138 | Vulkan::VKDescriptorPool& descriptor_pool, | 138 | Vulkan::VKDescriptorPool& descriptor_pool, |
| 139 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | 139 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |
| 140 | Vulkan::VKRenderPassCache& renderpass_cache, | ||
| 141 | std::vector<VkDescriptorSetLayoutBinding> bindings, | 140 | std::vector<VkDescriptorSetLayoutBinding> bindings, |
| 142 | Vulkan::SPIRVProgram program, | 141 | Vulkan::SPIRVProgram program, |
| 143 | Vulkan::GraphicsPipelineCacheKey key) { | 142 | Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { |
| 144 | std::unique_lock lock(queue_mutex); | 143 | std::unique_lock lock(queue_mutex); |
| 145 | pending_queue.push({ | 144 | pending_queue.push({ |
| 146 | .backend = Backend::Vulkan, | 145 | .backend = Backend::Vulkan, |
| @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | |||
| 149 | .scheduler = &scheduler, | 148 | .scheduler = &scheduler, |
| 150 | .descriptor_pool = &descriptor_pool, | 149 | .descriptor_pool = &descriptor_pool, |
| 151 | .update_descriptor_queue = &update_descriptor_queue, | 150 | .update_descriptor_queue = &update_descriptor_queue, |
| 152 | .renderpass_cache = &renderpass_cache, | ||
| 153 | .bindings = std::move(bindings), | 151 | .bindings = std::move(bindings), |
| 154 | .program = std::move(program), | 152 | .program = std::move(program), |
| 155 | .key = key, | 153 | .key = key, |
| 154 | .num_color_buffers = num_color_buffers, | ||
| 156 | }); | 155 | }); |
| 157 | cv.notify_one(); | 156 | cv.notify_one(); |
| 158 | } | 157 | } |
| @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context | |||
| 205 | } else if (work.backend == Backend::Vulkan) { | 204 | } else if (work.backend == Backend::Vulkan) { |
| 206 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | 205 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( |
| 207 | *work.vk_device, *work.scheduler, *work.descriptor_pool, | 206 | *work.vk_device, *work.scheduler, *work.descriptor_pool, |
| 208 | *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, | 207 | *work.update_descriptor_queue, work.key, work.bindings, work.program, |
| 209 | work.program); | 208 | work.num_color_buffers); |
| 210 | 209 | ||
| 211 | work.pp_cache->EmplacePipeline(std::move(pipeline)); | 210 | work.pp_cache->EmplacePipeline(std::move(pipeline)); |
| 212 | } | 211 | } |
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 5a7216019..004e214a8 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -98,9 +98,9 @@ public: | |||
| 98 | Vulkan::VKScheduler& scheduler, | 98 | Vulkan::VKScheduler& scheduler, |
| 99 | Vulkan::VKDescriptorPool& descriptor_pool, | 99 | Vulkan::VKDescriptorPool& descriptor_pool, |
| 100 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | 100 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |
| 101 | Vulkan::VKRenderPassCache& renderpass_cache, | ||
| 102 | std::vector<VkDescriptorSetLayoutBinding> bindings, | 101 | std::vector<VkDescriptorSetLayoutBinding> bindings, |
| 103 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); | 102 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, |
| 103 | u32 num_color_buffers); | ||
| 104 | 104 | ||
| 105 | private: | 105 | private: |
| 106 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | 106 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); |
| @@ -127,10 +127,10 @@ private: | |||
| 127 | Vulkan::VKScheduler* scheduler; | 127 | Vulkan::VKScheduler* scheduler; |
| 128 | Vulkan::VKDescriptorPool* descriptor_pool; | 128 | Vulkan::VKDescriptorPool* descriptor_pool; |
| 129 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | 129 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; |
| 130 | Vulkan::VKRenderPassCache* renderpass_cache; | ||
| 131 | std::vector<VkDescriptorSetLayoutBinding> bindings; | 130 | std::vector<VkDescriptorSetLayoutBinding> bindings; |
| 132 | Vulkan::SPIRVProgram program; | 131 | Vulkan::SPIRVProgram program; |
| 133 | Vulkan::GraphicsPipelineCacheKey key; | 132 | Vulkan::GraphicsPipelineCacheKey key; |
| 133 | u32 num_color_buffers; | ||
| 134 | }; | 134 | }; |
| 135 | 135 | ||
| 136 | std::condition_variable cv; | 136 | std::condition_variable cv; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index ab14c1aa3..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode; | |||
| 25 | namespace { | 25 | namespace { |
| 26 | 26 | ||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 28 | const std::list<Sampler>& used_samplers) { | 28 | const std::list<SamplerEntry>& used_samplers) { |
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 30 | return; | 30 | return; |
| 31 | } | 31 | } |
| @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | |||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, |
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | 47 | VideoCore::GuestDriverProfile& gpu_driver, |
| 48 | const std::list<Sampler>& used_samplers) { | 48 | const std::list<SamplerEntry>& used_samplers) { |
| 49 | const u32 base_offset = sampler_to_deduce.offset; | 49 | const u32 base_offset = sampler_to_deduce.offset; |
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | 50 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 51 | for (const auto& sampler : used_samplers) { | 51 | for (const auto& sampler : used_samplers) { |
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index b2e88fa20..fa83108cd 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 22 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| 23 | const auto opcode = OpCode::Decode(instr); | 23 | const auto opcode = OpCode::Decode(instr); |
| 24 | 24 | ||
| 25 | PredCondition cond; | 25 | PredCondition cond{}; |
| 26 | bool bf; | 26 | bool bf = false; |
| 27 | bool ftz; | 27 | bool ftz = false; |
| 28 | bool neg_a; | 28 | bool neg_a = false; |
| 29 | bool abs_a; | 29 | bool abs_a = false; |
| 30 | bool neg_b; | 30 | bool neg_b = false; |
| 31 | bool abs_b; | 31 | bool abs_b = false; |
| 32 | switch (opcode->get().GetId()) { | 32 | switch (opcode->get().GetId()) { |
| 33 | case OpCode::Id::HSET2_C: | 33 | case OpCode::Id::HSET2_C: |
| 34 | case OpCode::Id::HSET2_IMM: | 34 | case OpCode::Id::HSET2_IMM: |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 532f66d27..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 497 | return pc; | 497 | return pc; |
| 498 | } | 498 | } |
| 499 | 499 | ||
| 500 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | 501 | const auto offset = static_cast<u32>(image.index.Value()); |
| 502 | 502 | ||
| 503 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | 503 | const auto it = |
| 504 | [offset](const Image& entry) { return entry.offset == offset; }); | 504 | std::find_if(std::begin(used_images), std::end(used_images), |
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 505 | if (it != std::end(used_images)) { | 506 | if (it != std::end(used_images)) { |
| 506 | ASSERT(!it->is_bindless && it->type == type); | 507 | ASSERT(!it->is_bindless && it->type == type); |
| 507 | return *it; | 508 | return *it; |
| @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t | |||
| 511 | return used_images.emplace_back(next_index, offset, type); | 512 | return used_images.emplace_back(next_index, offset, type); |
| 512 | } | 513 | } |
| 513 | 514 | ||
| 514 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { |
| 515 | const Node image_register = GetRegister(reg); | 516 | const Node image_register = GetRegister(reg); |
| 516 | const auto result = | 517 | const auto result = |
| 517 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); |
| @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im | |||
| 520 | const auto offset = std::get<2>(result); | 521 | const auto offset = std::get<2>(result); |
| 521 | 522 | ||
| 522 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), |
| 523 | [buffer, offset](const Image& entry) { | 524 | [buffer, offset](const ImageEntry& entry) { |
| 524 | return entry.buffer == buffer && entry.offset == offset; | 525 | return entry.buffer == buffer && entry.offset == offset; |
| 525 | }); | 526 | }); |
| 526 | if (it != std::end(used_images)) { | 527 | if (it != std::end(used_images)) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index fb18f631f..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 141 | 141 | ||
| 142 | SamplerInfo info; | 142 | SamplerInfo info; |
| 143 | info.is_shadow = is_depth_compare; | 143 | info.is_shadow = is_depth_compare; |
| 144 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); | 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |
| 145 | 145 | ||
| 146 | Node4 values; | 146 | Node4 values; |
| 147 | for (u32 element = 0; element < values.size(); ++element) { | 147 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 173 | SamplerInfo info; | 173 | SamplerInfo info; |
| 174 | info.type = texture_type; | 174 | info.type = texture_type; |
| 175 | info.is_array = is_array; | 175 | info.is_array = is_array; |
| 176 | const std::optional<Sampler> sampler = is_bindless | 176 | const std::optional<SamplerEntry> sampler = |
| 177 | ? GetBindlessSampler(base_reg, info, index_var) | 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) |
| 178 | : GetSampler(instr.sampler, info); | 178 | : GetSampler(instr.sampler, info); |
| 179 | Node4 values; | 179 | Node4 values; |
| 180 | if (!sampler) { | 180 | if (!sampler) { |
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); |
| @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 217 | [[fallthrough]]; | 217 | [[fallthrough]]; |
| 218 | case OpCode::Id::TXQ: { | 218 | case OpCode::Id::TXQ: { |
| 219 | Node index_var; | 219 | Node index_var; |
| 220 | const std::optional<Sampler> sampler = is_bindless | 220 | const std::optional<SamplerEntry> sampler = |
| 221 | ? GetBindlessSampler(instr.gpr8, {}, index_var) | 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) |
| 222 | : GetSampler(instr.sampler, {}); | 222 | : GetSampler(instr.sampler, {}); |
| 223 | 223 | ||
| 224 | if (!sampler) { | 224 | if (!sampler) { |
| 225 | u32 indexer = 0; | 225 | u32 indexer = 0; |
| @@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 272 | info.type = texture_type; | 272 | info.type = texture_type; |
| 273 | info.is_array = is_array; | 273 | info.is_array = is_array; |
| 274 | Node index_var; | 274 | Node index_var; |
| 275 | const std::optional<Sampler> sampler = | 275 | const std::optional<SamplerEntry> sampler = |
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) |
| 277 | : GetSampler(instr.sampler, info); | 277 | : GetSampler(instr.sampler, info); |
| 278 | 278 | ||
| @@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | |||
| 379 | return info; | 379 | return info; |
| 380 | } | 380 | } |
| 381 | 381 | ||
| 382 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, |
| 383 | SamplerInfo sampler_info) { | 383 | SamplerInfo sampler_info) { |
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); |
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); |
| 386 | 386 | ||
| 387 | // If this sampler has already been used, return the existing mapping. | 387 | // If this sampler has already been used, return the existing mapping. |
| 388 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 388 | const auto it = |
| 389 | [offset](const Sampler& entry) { return entry.offset == offset; }); | 389 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 390 | if (it != used_samplers.end()) { | 391 | if (it != used_samplers.end()) { |
| 391 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |
| 392 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |
| @@ -399,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | |||
| 399 | *info.is_shadow, *info.is_buffer, false); | 400 | *info.is_shadow, *info.is_buffer, false); |
| 400 | } | 401 | } |
| 401 | 402 | ||
| 402 | std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, |
| 403 | Node& index_var) { | 404 | SamplerInfo info, Node& index_var) { |
| 404 | const Node sampler_register = GetRegister(reg); | 405 | const Node sampler_register = GetRegister(reg); |
| 405 | const auto [base_node, tracked_sampler_info] = | 406 | const auto [base_node, tracked_sampler_info] = |
| 406 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| @@ -416,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 416 | 417 | ||
| 417 | // If this sampler has already been used, return the existing mapping. | 418 | // If this sampler has already been used, return the existing mapping. |
| 418 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| 419 | [buffer, offset](const Sampler& entry) { | 420 | [buffer, offset](const SamplerEntry& entry) { |
| 420 | return entry.buffer == buffer && entry.offset == offset; | 421 | return entry.buffer == buffer && entry.offset == offset; |
| 421 | }); | 422 | }); |
| 422 | if (it != used_samplers.end()) { | 423 | if (it != used_samplers.end()) { |
| @@ -436,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 436 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); |
| 437 | 438 | ||
| 438 | // Try to use an already created sampler if it exists | 439 | // Try to use an already created sampler if it exists |
| 439 | const auto it = std::find_if( | 440 | const auto it = |
| 440 | used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { | 441 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| 441 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | 442 | [indices, offsets](const SamplerEntry& entry) { |
| 442 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && |
| 443 | }); | 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; |
| 445 | }); | ||
| 444 | if (it != used_samplers.end()) { | 446 | if (it != used_samplers.end()) { |
| 445 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && |
| 446 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |
| @@ -460,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 460 | // If this sampler has already been used, return the existing mapping. | 462 | // If this sampler has already been used, return the existing mapping. |
| 461 | const auto it = std::find_if( | 463 | const auto it = std::find_if( |
| 462 | used_samplers.begin(), used_samplers.end(), | 464 | used_samplers.begin(), used_samplers.end(), |
| 463 | [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); | 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); |
| 464 | if (it != used_samplers.end()) { | 466 | if (it != used_samplers.end()) { |
| 465 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |
| 466 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && |
| @@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 565 | info.is_buffer = false; | 567 | info.is_buffer = false; |
| 566 | 568 | ||
| 567 | Node index_var; | 569 | Node index_var; |
| 568 | const std::optional<Sampler> sampler = is_bindless | 570 | const std::optional<SamplerEntry> sampler = |
| 569 | ? GetBindlessSampler(*bindless_reg, info, index_var) | 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) |
| 570 | : GetSampler(instr.sampler, info); | 572 | : GetSampler(instr.sampler, info); |
| 571 | if (!sampler) { | 573 | if (!sampler) { |
| 572 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; |
| 573 | } | 575 | } |
| @@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 724 | info.is_shadow = depth_compare; | 726 | info.is_shadow = depth_compare; |
| 725 | 727 | ||
| 726 | Node index_var; | 728 | Node index_var; |
| 727 | const std::optional<Sampler> sampler = | 729 | const std::optional<SamplerEntry> sampler = |
| 728 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) |
| 729 | : GetSampler(instr.sampler, info); | 731 | : GetSampler(instr.sampler, info); |
| 730 | Node4 values; | 732 | Node4 values; |
| @@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 783 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |
| 784 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |
| 785 | 787 | ||
| 786 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); | 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); |
| 787 | 789 | ||
| 788 | Node4 values; | 790 | Node4 values; |
| 789 | for (u32 element = 0; element < values.size(); ++element) { | 791 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 800 | info.type = texture_type; | 802 | info.type = texture_type; |
| 801 | info.is_array = is_array; | 803 | info.is_array = is_array; |
| 802 | info.is_shadow = false; | 804 | info.is_shadow = false; |
| 803 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); | 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |
| 804 | 806 | ||
| 805 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 806 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8db9e1de7..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -282,25 +282,24 @@ struct SeparateSamplerNode; | |||
| 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; | 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; |
| 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; |
| 284 | 284 | ||
| 285 | struct Sampler { | 285 | struct SamplerEntry { |
| 286 | /// Bound samplers constructor | 286 | /// Bound samplers constructor |
| 287 | constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, | 287 | explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, |
| 288 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) | 288 | bool is_shadow_, bool is_buffer_, bool is_indexed_) |
| 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, | 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, |
| 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} | 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} |
| 291 | 291 | ||
| 292 | /// Separate sampler constructor | 292 | /// Separate sampler constructor |
| 293 | constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets_, | 293 | explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, |
| 294 | std::pair<u32, u32> buffers_, Tegra::Shader::TextureType type_, | 294 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, |
| 295 | bool is_array_, bool is_shadow_, bool is_buffer_) | 295 | bool is_buffer_) |
| 296 | : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, | 296 | : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, |
| 297 | buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, | 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, |
| 298 | is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} | 298 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} |
| 299 | 299 | ||
| 300 | /// Bindless samplers constructor | 300 | /// Bindless samplers constructor |
| 301 | constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, | 301 | explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, |
| 302 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, | 302 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) |
| 303 | bool is_buffer_, bool is_indexed_) | ||
| 304 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, | 303 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, |
| 305 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { | 304 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { |
| 306 | } | 305 | } |
| @@ -340,14 +339,14 @@ struct BindlessSamplerNode { | |||
| 340 | u32 offset; | 339 | u32 offset; |
| 341 | }; | 340 | }; |
| 342 | 341 | ||
| 343 | struct Image { | 342 | struct ImageEntry { |
| 344 | public: | 343 | public: |
| 345 | /// Bound images constructor | 344 | /// Bound images constructor |
| 346 | constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) | 345 | explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) |
| 347 | : index{index_}, offset{offset_}, type{type_} {} | 346 | : index{index_}, offset{offset_}, type{type_} {} |
| 348 | 347 | ||
| 349 | /// Bindless samplers constructor | 348 | /// Bindless samplers constructor |
| 350 | constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) | 349 | explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) |
| 351 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} | 350 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} |
| 352 | 351 | ||
| 353 | void MarkWrite() { | 352 | void MarkWrite() { |
| @@ -391,7 +390,7 @@ struct MetaArithmetic { | |||
| 391 | 390 | ||
| 392 | /// Parameters describing a texture sampler | 391 | /// Parameters describing a texture sampler |
| 393 | struct MetaTexture { | 392 | struct MetaTexture { |
| 394 | Sampler sampler; | 393 | SamplerEntry sampler; |
| 395 | Node array; | 394 | Node array; |
| 396 | Node depth_compare; | 395 | Node depth_compare; |
| 397 | std::vector<Node> aoffi; | 396 | std::vector<Node> aoffi; |
| @@ -405,7 +404,7 @@ struct MetaTexture { | |||
| 405 | }; | 404 | }; |
| 406 | 405 | ||
| 407 | struct MetaImage { | 406 | struct MetaImage { |
| 408 | const Image& image; | 407 | const ImageEntry& image; |
| 409 | std::vector<Node> values; | 408 | std::vector<Node> values; |
| 410 | u32 element{}; | 409 | u32 element{}; |
| 411 | }; | 410 | }; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aae14e34..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -94,11 +94,11 @@ public: | |||
| 94 | return used_cbufs; | 94 | return used_cbufs; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | const std::list<Sampler>& GetSamplers() const { | 97 | const std::list<SamplerEntry>& GetSamplers() const { |
| 98 | return used_samplers; | 98 | return used_samplers; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | const std::list<Image>& GetImages() const { | 101 | const std::list<ImageEntry>& GetImages() const { |
| 102 | return used_images; | 102 | return used_images; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| @@ -334,17 +334,17 @@ private: | |||
| 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); | 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); |
| 335 | 335 | ||
| 336 | /// Accesses a texture sampler. | 336 | /// Accesses a texture sampler. |
| 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | 337 | std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); |
| 338 | 338 | ||
| 339 | /// Accesses a texture sampler for a bindless texture. | 339 | /// Accesses a texture sampler for a bindless texture. |
| 340 | std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | 340 | std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, |
| 341 | Node& index_var); | 341 | Node& index_var); |
| 342 | 342 | ||
| 343 | /// Accesses an image. | 343 | /// Accesses an image. |
| 344 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 344 | ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |
| 345 | 345 | ||
| 346 | /// Access a bindless image sampler. | 346 | /// Access a bindless image sampler. |
| 347 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | 347 | ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); |
| 348 | 348 | ||
| 349 | /// Extracts a sequence of bits from a node | 349 | /// Extracts a sequence of bits from a node |
| 350 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 350 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| @@ -454,8 +454,8 @@ private: | |||
| 454 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; | 454 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; |
| 455 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | 455 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
| 456 | std::map<u32, ConstBuffer> used_cbufs; | 456 | std::map<u32, ConstBuffer> used_cbufs; |
| 457 | std::list<Sampler> used_samplers; | 457 | std::list<SamplerEntry> used_samplers; |
| 458 | std::list<Image> used_images; | 458 | std::list<ImageEntry> used_images; |
| 459 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 459 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 460 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 460 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 461 | bool uses_layer{}; | 461 | bool uses_layer{}; |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 937e29d1e..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) { | |||
| 280 | } | 280 | } |
| 281 | 281 | ||
| 282 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | 282 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { |
| 283 | return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; | 283 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; |
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | } // namespace VideoCore::Surface | 286 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -120,7 +120,7 @@ enum class PixelFormat { | |||
| 120 | Max = MaxDepthStencilFormat, | 120 | Max = MaxDepthStencilFormat, |
| 121 | Invalid = 255, | 121 | Invalid = 255, |
| 122 | }; | 122 | }; |
| 123 | static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); | 123 | constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); |
| 124 | 124 | ||
| 125 | enum class SurfaceType { | 125 | enum class SurfaceType { |
| 126 | ColorTexture = 0, | 126 | ColorTexture = 0, |
| @@ -140,117 +140,7 @@ enum class SurfaceTarget { | |||
| 140 | TextureCubeArray, | 140 | TextureCubeArray, |
| 141 | }; | 141 | }; |
| 142 | 142 | ||
| 143 | constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | 143 | constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ |
| 144 | 0, // A8B8G8R8_UNORM | ||
| 145 | 0, // A8B8G8R8_SNORM | ||
| 146 | 0, // A8B8G8R8_SINT | ||
| 147 | 0, // A8B8G8R8_UINT | ||
| 148 | 0, // R5G6B5_UNORM | ||
| 149 | 0, // B5G6R5_UNORM | ||
| 150 | 0, // A1R5G5B5_UNORM | ||
| 151 | 0, // A2B10G10R10_UNORM | ||
| 152 | 0, // A2B10G10R10_UINT | ||
| 153 | 0, // A1B5G5R5_UNORM | ||
| 154 | 0, // R8_UNORM | ||
| 155 | 0, // R8_SNORM | ||
| 156 | 0, // R8_SINT | ||
| 157 | 0, // R8_UINT | ||
| 158 | 0, // R16G16B16A16_FLOAT | ||
| 159 | 0, // R16G16B16A16_UNORM | ||
| 160 | 0, // R16G16B16A16_SNORM | ||
| 161 | 0, // R16G16B16A16_SINT | ||
| 162 | 0, // R16G16B16A16_UINT | ||
| 163 | 0, // B10G11R11_FLOAT | ||
| 164 | 0, // R32G32B32A32_UINT | ||
| 165 | 2, // BC1_RGBA_UNORM | ||
| 166 | 2, // BC2_UNORM | ||
| 167 | 2, // BC3_UNORM | ||
| 168 | 2, // BC4_UNORM | ||
| 169 | 2, // BC4_SNORM | ||
| 170 | 2, // BC5_UNORM | ||
| 171 | 2, // BC5_SNORM | ||
| 172 | 2, // BC7_UNORM | ||
| 173 | 2, // BC6H_UFLOAT | ||
| 174 | 2, // BC6H_SFLOAT | ||
| 175 | 2, // ASTC_2D_4X4_UNORM | ||
| 176 | 0, // B8G8R8A8_UNORM | ||
| 177 | 0, // R32G32B32A32_FLOAT | ||
| 178 | 0, // R32G32B32A32_SINT | ||
| 179 | 0, // R32G32_FLOAT | ||
| 180 | 0, // R32G32_SINT | ||
| 181 | 0, // R32_FLOAT | ||
| 182 | 0, // R16_FLOAT | ||
| 183 | 0, // R16_UNORM | ||
| 184 | 0, // R16_SNORM | ||
| 185 | 0, // R16_UINT | ||
| 186 | 0, // R16_SINT | ||
| 187 | 0, // R16G16_UNORM | ||
| 188 | 0, // R16G16_FLOAT | ||
| 189 | 0, // R16G16_UINT | ||
| 190 | 0, // R16G16_SINT | ||
| 191 | 0, // R16G16_SNORM | ||
| 192 | 0, // R32G32B32_FLOAT | ||
| 193 | 0, // A8B8G8R8_SRGB | ||
| 194 | 0, // R8G8_UNORM | ||
| 195 | 0, // R8G8_SNORM | ||
| 196 | 0, // R8G8_SINT | ||
| 197 | 0, // R8G8_UINT | ||
| 198 | 0, // R32G32_UINT | ||
| 199 | 0, // R16G16B16X16_FLOAT | ||
| 200 | 0, // R32_UINT | ||
| 201 | 0, // R32_SINT | ||
| 202 | 2, // ASTC_2D_8X8_UNORM | ||
| 203 | 2, // ASTC_2D_8X5_UNORM | ||
| 204 | 2, // ASTC_2D_5X4_UNORM | ||
| 205 | 0, // B8G8R8A8_SRGB | ||
| 206 | 2, // BC1_RGBA_SRGB | ||
| 207 | 2, // BC2_SRGB | ||
| 208 | 2, // BC3_SRGB | ||
| 209 | 2, // BC7_SRGB | ||
| 210 | 0, // A4B4G4R4_UNORM | ||
| 211 | 2, // ASTC_2D_4X4_SRGB | ||
| 212 | 2, // ASTC_2D_8X8_SRGB | ||
| 213 | 2, // ASTC_2D_8X5_SRGB | ||
| 214 | 2, // ASTC_2D_5X4_SRGB | ||
| 215 | 2, // ASTC_2D_5X5_UNORM | ||
| 216 | 2, // ASTC_2D_5X5_SRGB | ||
| 217 | 2, // ASTC_2D_10X8_UNORM | ||
| 218 | 2, // ASTC_2D_10X8_SRGB | ||
| 219 | 2, // ASTC_2D_6X6_UNORM | ||
| 220 | 2, // ASTC_2D_6X6_SRGB | ||
| 221 | 2, // ASTC_2D_10X10_UNORM | ||
| 222 | 2, // ASTC_2D_10X10_SRGB | ||
| 223 | 2, // ASTC_2D_12X12_UNORM | ||
| 224 | 2, // ASTC_2D_12X12_SRGB | ||
| 225 | 2, // ASTC_2D_8X6_UNORM | ||
| 226 | 2, // ASTC_2D_8X6_SRGB | ||
| 227 | 2, // ASTC_2D_6X5_UNORM | ||
| 228 | 2, // ASTC_2D_6X5_SRGB | ||
| 229 | 0, // E5B9G9R9_FLOAT | ||
| 230 | 0, // D32_FLOAT | ||
| 231 | 0, // D16_UNORM | ||
| 232 | 0, // D24_UNORM_S8_UINT | ||
| 233 | 0, // S8_UINT_D24_UNORM | ||
| 234 | 0, // D32_FLOAT_S8_UINT | ||
| 235 | }}; | ||
| 236 | |||
| 237 | /** | ||
| 238 | * Gets the compression factor for the specified PixelFormat. This applies to just the | ||
| 239 | * "compressed width" and "compressed height", not the overall compression factor of a | ||
| 240 | * compressed image. This is used for maintaining proper surface sizes for compressed | ||
| 241 | * texture formats. | ||
| 242 | */ | ||
| 243 | inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { | ||
| 244 | DEBUG_ASSERT(format != PixelFormat::Invalid); | ||
| 245 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); | ||
| 246 | return compression_factor_shift_table[static_cast<std::size_t>(format)]; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline constexpr u32 GetCompressionFactor(PixelFormat format) { | ||
| 250 | return 1U << GetCompressionFactorShift(format); | ||
| 251 | } | ||
| 252 | |||
| 253 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | ||
| 254 | 1, // A8B8G8R8_UNORM | 144 | 1, // A8B8G8R8_UNORM |
| 255 | 1, // A8B8G8R8_SNORM | 145 | 1, // A8B8G8R8_SNORM |
| 256 | 1, // A8B8G8R8_SINT | 146 | 1, // A8B8G8R8_SINT |
| @@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 344 | 1, // D32_FLOAT_S8_UINT | 234 | 1, // D32_FLOAT_S8_UINT |
| 345 | }}; | 235 | }}; |
| 346 | 236 | ||
| 347 | static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { | 237 | constexpr u32 DefaultBlockWidth(PixelFormat format) { |
| 348 | if (format == PixelFormat::Invalid) | 238 | ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size()); |
| 349 | return 0; | 239 | return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)]; |
| 350 | |||
| 351 | ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); | ||
| 352 | return block_width_table[static_cast<std::size_t>(format)]; | ||
| 353 | } | 240 | } |
| 354 | 241 | ||
| 355 | constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | 242 | constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ |
| 356 | 1, // A8B8G8R8_UNORM | 243 | 1, // A8B8G8R8_UNORM |
| 357 | 1, // A8B8G8R8_SNORM | 244 | 1, // A8B8G8R8_SNORM |
| 358 | 1, // A8B8G8R8_SINT | 245 | 1, // A8B8G8R8_SINT |
| @@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 446 | 1, // D32_FLOAT_S8_UINT | 333 | 1, // D32_FLOAT_S8_UINT |
| 447 | }}; | 334 | }}; |
| 448 | 335 | ||
| 449 | static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { | 336 | constexpr u32 DefaultBlockHeight(PixelFormat format) { |
| 450 | if (format == PixelFormat::Invalid) | 337 | ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size()); |
| 451 | return 0; | 338 | return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)]; |
| 452 | |||
| 453 | ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); | ||
| 454 | return block_height_table[static_cast<std::size_t>(format)]; | ||
| 455 | } | 339 | } |
| 456 | 340 | ||
| 457 | constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | 341 | constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ |
| 458 | 32, // A8B8G8R8_UNORM | 342 | 32, // A8B8G8R8_UNORM |
| 459 | 32, // A8B8G8R8_SNORM | 343 | 32, // A8B8G8R8_SNORM |
| 460 | 32, // A8B8G8R8_SINT | 344 | 32, // A8B8G8R8_SINT |
| @@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 548 | 64, // D32_FLOAT_S8_UINT | 432 | 64, // D32_FLOAT_S8_UINT |
| 549 | }}; | 433 | }}; |
| 550 | 434 | ||
| 551 | static constexpr u32 GetFormatBpp(PixelFormat format) { | 435 | constexpr u32 BitsPerBlock(PixelFormat format) { |
| 552 | if (format == PixelFormat::Invalid) | 436 | ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size()); |
| 553 | return 0; | 437 | return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)]; |
| 554 | |||
| 555 | ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); | ||
| 556 | return bpp_table[static_cast<std::size_t>(format)]; | ||
| 557 | } | 438 | } |
| 558 | 439 | ||
| 559 | /// Returns the sizer in bytes of the specified pixel format | 440 | /// Returns the sizer in bytes of the specified pixel format |
| 560 | static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { | 441 | constexpr u32 BytesPerBlock(PixelFormat pixel_format) { |
| 561 | if (pixel_format == PixelFormat::Invalid) { | 442 | return BitsPerBlock(pixel_format) / CHAR_BIT; |
| 562 | return 0; | ||
| 563 | } | ||
| 564 | return GetFormatBpp(pixel_format) / CHAR_BIT; | ||
| 565 | } | 443 | } |
| 566 | 444 | ||
| 567 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); | 445 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); |
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/div_ceil.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 13 | #include "video_core/texture_cache/util.h" | ||
| 14 | #include "video_core/textures/decoders.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Accelerated { | ||
| 17 | |||
| 18 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 19 | using Tegra::Texture::GOB_SIZE_X; | ||
| 20 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 21 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 22 | using VideoCore::Surface::BytesPerBlock; | ||
| 23 | |||
| 24 | BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, | ||
| 25 | const ImageInfo& info) { | ||
| 26 | const Extent3D block = swizzle.block; | ||
| 27 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 28 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 29 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 30 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 31 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||
| 32 | return BlockLinearSwizzle2DParams{ | ||
| 33 | .origin{0, 0, 0}, | ||
| 34 | .destination{0, 0, 0}, | ||
| 35 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 36 | .layer_stride = info.layer_stride, | ||
| 37 | .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), | ||
| 38 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 39 | .block_height = block.height, | ||
| 40 | .block_height_mask = (1U << block.height) - 1, | ||
| 41 | }; | ||
| 42 | } | ||
| 43 | |||
| 44 | BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, | ||
| 45 | const ImageInfo& info) { | ||
| 46 | const Extent3D block = swizzle.block; | ||
| 47 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 48 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 49 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 50 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 51 | |||
| 52 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||
| 53 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||
| 54 | const u32 slice_size = | ||
| 55 | Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; | ||
| 56 | return BlockLinearSwizzle3DParams{ | ||
| 57 | .origin{0, 0, 0}, | ||
| 58 | .destination{0, 0, 0}, | ||
| 59 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 60 | .slice_size = slice_size, | ||
| 61 | .block_size = block_size, | ||
| 62 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 63 | .block_height = block.height, | ||
| 64 | .block_height_mask = (1U << block.height) - 1, | ||
| 65 | .block_depth = block.depth, | ||
| 66 | .block_depth_mask = (1U << block.depth) - 1, | ||
| 67 | }; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Accelerated \ No newline at end of file | ||
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Accelerated { | ||
| 14 | |||
| 15 | struct BlockLinearSwizzle2DParams { | ||
| 16 | std::array<u32, 3> origin; | ||
| 17 | std::array<s32, 3> destination; | ||
| 18 | u32 bytes_per_block_log2; | ||
| 19 | u32 layer_stride; | ||
| 20 | u32 block_size; | ||
| 21 | u32 x_shift; | ||
| 22 | u32 block_height; | ||
| 23 | u32 block_height_mask; | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct BlockLinearSwizzle3DParams { | ||
| 27 | std::array<u32, 3> origin; | ||
| 28 | std::array<s32, 3> destination; | ||
| 29 | u32 bytes_per_block_log2; | ||
| 30 | u32 slice_size; | ||
| 31 | u32 block_size; | ||
| 32 | u32 x_shift; | ||
| 33 | u32 block_height; | ||
| 34 | u32 block_height_mask; | ||
| 35 | u32 block_depth; | ||
| 36 | u32 block_depth_mask; | ||
| 37 | }; | ||
| 38 | |||
| 39 | [[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( | ||
| 40 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 41 | |||
| 42 | [[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( | ||
| 43 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Accelerated | ||
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 5b475fe06..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null | |||
| @@ -1,36 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | struct CopyParams { | ||
| 12 | constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, | ||
| 13 | u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, | ||
| 14 | u32 depth_) | ||
| 15 | : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, | ||
| 16 | dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, | ||
| 17 | dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} | ||
| 18 | |||
| 19 | constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) | ||
| 20 | : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, | ||
| 21 | dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} | ||
| 22 | |||
| 23 | u32 source_x; | ||
| 24 | u32 source_y; | ||
| 25 | u32 source_z; | ||
| 26 | u32 dest_x; | ||
| 27 | u32 dest_y; | ||
| 28 | u32 dest_z; | ||
| 29 | u32 source_level; | ||
| 30 | u32 dest_level; | ||
| 31 | u32 width; | ||
| 32 | u32 height; | ||
| 33 | u32 depth; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 17 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 18 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 19 | const u32 code = (bits >> code_offset) & 7; | ||
| 20 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 21 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 22 | if (red0 > red1) { | ||
| 23 | switch (code) { | ||
| 24 | case 0: | ||
| 25 | return red0; | ||
| 26 | case 1: | ||
| 27 | return red1; | ||
| 28 | case 2: | ||
| 29 | return (6 * red0 + 1 * red1) / 7; | ||
| 30 | case 3: | ||
| 31 | return (5 * red0 + 2 * red1) / 7; | ||
| 32 | case 4: | ||
| 33 | return (4 * red0 + 3 * red1) / 7; | ||
| 34 | case 5: | ||
| 35 | return (3 * red0 + 4 * red1) / 7; | ||
| 36 | case 6: | ||
| 37 | return (2 * red0 + 5 * red1) / 7; | ||
| 38 | case 7: | ||
| 39 | return (1 * red0 + 6 * red1) / 7; | ||
| 40 | } | ||
| 41 | } else { | ||
| 42 | switch (code) { | ||
| 43 | case 0: | ||
| 44 | return red0; | ||
| 45 | case 1: | ||
| 46 | return red1; | ||
| 47 | case 2: | ||
| 48 | return (4 * red0 + 1 * red1) / 5; | ||
| 49 | case 3: | ||
| 50 | return (3 * red0 + 2 * red1) / 5; | ||
| 51 | case 4: | ||
| 52 | return (2 * red0 + 3 * red1) / 5; | ||
| 53 | case 5: | ||
| 54 | return (1 * red0 + 4 * red1) / 5; | ||
| 55 | case 6: | ||
| 56 | return 0; | ||
| 57 | case 7: | ||
| 58 | return 0xff; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 66 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 67 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 68 | size_t input_offset = 0; | ||
| 69 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 70 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 71 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 72 | u64 bits; | ||
| 73 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 74 | input_offset += sizeof(bits); | ||
| 75 | |||
| 76 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 77 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 78 | const u32 linear_z = slice; | ||
| 79 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 80 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 81 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 82 | const u32 offset_y = linear_y * extent.width; | ||
| 83 | const u32 offset_x = linear_x; | ||
| 84 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 85 | const u32 color = DecompressBlock(bits, x, y); | ||
| 86 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 87 | output[output_offset + 1] = 0; | ||
| 88 | output[output_offset + 2] = 0; | ||
| 89 | output[output_offset + 3] = 0xff; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | ||
| 15 | |||
| 16 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/div_ceil.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/rasterizer_interface.h" | ||
| 15 | |||
| 16 | namespace VideoCommon { | ||
| 17 | |||
| 18 | template <typename Descriptor> | ||
| 19 | class DescriptorTable { | ||
| 20 | public: | ||
| 21 | explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} | ||
| 22 | |||
| 23 | [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { | ||
| 24 | [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { | ||
| 25 | return false; | ||
| 26 | } | ||
| 27 | Refresh(gpu_addr, limit); | ||
| 28 | return true; | ||
| 29 | } | ||
| 30 | |||
| 31 | void Invalidate() noexcept { | ||
| 32 | std::ranges::fill(read_descriptors, 0); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { | ||
| 36 | DEBUG_ASSERT(index <= current_limit); | ||
| 37 | const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); | ||
| 38 | std::pair<Descriptor, bool> result; | ||
| 39 | gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); | ||
| 40 | if (IsDescriptorRead(index)) { | ||
| 41 | result.second = result.first != descriptors[index]; | ||
| 42 | } else { | ||
| 43 | MarkDescriptorAsRead(index); | ||
| 44 | result.second = true; | ||
| 45 | } | ||
| 46 | if (result.second) { | ||
| 47 | descriptors[index] = result.first; | ||
| 48 | } | ||
| 49 | return result; | ||
| 50 | } | ||
| 51 | |||
| 52 | [[nodiscard]] u32 Limit() const noexcept { | ||
| 53 | return current_limit; | ||
| 54 | } | ||
| 55 | |||
| 56 | private: | ||
| 57 | void Refresh(GPUVAddr gpu_addr, u32 limit) { | ||
| 58 | current_gpu_addr = gpu_addr; | ||
| 59 | current_limit = limit; | ||
| 60 | |||
| 61 | const size_t num_descriptors = static_cast<size_t>(limit) + 1; | ||
| 62 | read_descriptors.clear(); | ||
| 63 | read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); | ||
| 64 | descriptors.resize(num_descriptors); | ||
| 65 | } | ||
| 66 | |||
| 67 | void MarkDescriptorAsRead(u32 index) noexcept { | ||
| 68 | read_descriptors[index / 64] |= 1ULL << (index % 64); | ||
| 69 | } | ||
| 70 | |||
| 71 | [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { | ||
| 72 | return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | Tegra::MemoryManager& gpu_memory; | ||
| 76 | GPUVAddr current_gpu_addr{}; | ||
| 77 | u32 current_limit{}; | ||
| 78 | std::vector<u64> read_descriptors; | ||
| 79 | std::vector<Descriptor> descriptors; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7938d71eb..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 7 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 8 | #include "video_core/texture_cache/format_lookup_table.h" | 7 | #include "video_core/texture_cache/format_lookup_table.h" |
| @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; | |||
| 20 | constexpr auto SINT = ComponentType::SINT; | 19 | constexpr auto SINT = ComponentType::SINT; |
| 21 | constexpr auto UINT = ComponentType::UINT; | 20 | constexpr auto UINT = ComponentType::UINT; |
| 22 | constexpr auto FLOAT = ComponentType::FLOAT; | 21 | constexpr auto FLOAT = ComponentType::FLOAT; |
| 23 | constexpr bool C = false; // Normal color | 22 | constexpr bool LINEAR = false; |
| 24 | constexpr bool S = true; // Srgb | 23 | constexpr bool SRGB = true; |
| 25 | 24 | ||
| 26 | struct Table { | 25 | constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, |
| 27 | constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, | 26 | ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { |
| 28 | ComponentType green_component_, ComponentType blue_component_, | 27 | u32 hash = is_srgb ? 1 : 0; |
| 29 | ComponentType alpha_component_, PixelFormat pixel_format_) | 28 | hash |= static_cast<u32>(red_component) << 1; |
| 30 | : texture_format{texture_format_}, pixel_format{pixel_format_}, | 29 | hash |= static_cast<u32>(green_component) << 4; |
| 31 | red_component{red_component_}, green_component{green_component_}, | 30 | hash |= static_cast<u32>(blue_component) << 7; |
| 32 | blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} | 31 | hash |= static_cast<u32>(alpha_component) << 10; |
| 33 | 32 | hash |= static_cast<u32>(format) << 13; | |
| 34 | TextureFormat texture_format; | 33 | return hash; |
| 35 | PixelFormat pixel_format; | 34 | } |
| 36 | ComponentType red_component; | ||
| 37 | ComponentType green_component; | ||
| 38 | ComponentType blue_component; | ||
| 39 | ComponentType alpha_component; | ||
| 40 | bool is_srgb; | ||
| 41 | }; | ||
| 42 | constexpr std::array<Table, 86> DefinitionTable = {{ | ||
| 43 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, | ||
| 44 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, | ||
| 45 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, | ||
| 46 | {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, | ||
| 47 | {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, | ||
| 48 | |||
| 49 | {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, | ||
| 50 | |||
| 51 | {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, | ||
| 52 | {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, | ||
| 53 | |||
| 54 | {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, | ||
| 55 | |||
| 56 | {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, | ||
| 57 | |||
| 58 | {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, | ||
| 59 | {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, | ||
| 60 | {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, | ||
| 61 | {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, | ||
| 62 | |||
| 63 | {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, | ||
| 64 | {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, | ||
| 65 | {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, | ||
| 66 | {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, | ||
| 67 | |||
| 68 | {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, | ||
| 69 | {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, | ||
| 70 | {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, | ||
| 71 | {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, | ||
| 72 | {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, | ||
| 73 | |||
| 74 | {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, | ||
| 75 | {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, | ||
| 76 | {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, | ||
| 77 | {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, | ||
| 78 | {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, | ||
| 79 | |||
| 80 | {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, | ||
| 81 | {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, | ||
| 82 | {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, | ||
| 83 | {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, | ||
| 84 | {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, | ||
| 85 | |||
| 86 | {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, | ||
| 87 | |||
| 88 | {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, | ||
| 89 | {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, | ||
| 90 | {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, | ||
| 91 | |||
| 92 | {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, | ||
| 93 | |||
| 94 | {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, | ||
| 95 | {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, | ||
| 96 | {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, | ||
| 97 | |||
| 98 | {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, | ||
| 99 | {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, | ||
| 100 | {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, | ||
| 101 | |||
| 102 | {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, | ||
| 103 | |||
| 104 | {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, | ||
| 105 | {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, | ||
| 106 | {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 107 | {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 108 | {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, | ||
| 109 | |||
| 110 | {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, | ||
| 111 | {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, | ||
| 112 | |||
| 113 | {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, | ||
| 114 | {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, | ||
| 115 | |||
| 116 | {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, | ||
| 117 | {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, | ||
| 118 | |||
| 119 | {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, | ||
| 120 | {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, | ||
| 121 | |||
| 122 | {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, | ||
| 123 | {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, | ||
| 124 | |||
| 125 | {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, | ||
| 126 | {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, | ||
| 127 | |||
| 128 | {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, | ||
| 129 | {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, | ||
| 130 | |||
| 131 | {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, | ||
| 132 | {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, | ||
| 133 | |||
| 134 | {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, | ||
| 135 | {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, | ||
| 136 | |||
| 137 | {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, | ||
| 138 | {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, | ||
| 139 | |||
| 140 | {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, | ||
| 141 | {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, | ||
| 142 | |||
| 143 | {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, | ||
| 144 | {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, | ||
| 145 | |||
| 146 | {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, | ||
| 147 | {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, | ||
| 148 | |||
| 149 | {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, | ||
| 150 | {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, | ||
| 151 | |||
| 152 | {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, | ||
| 153 | {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, | ||
| 154 | |||
| 155 | {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, | ||
| 156 | {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, | ||
| 157 | |||
| 158 | {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, | ||
| 159 | {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, | ||
| 160 | 35 | ||
| 161 | {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, | 36 | constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { |
| 162 | {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, | 37 | return Hash(format, component, component, component, component, is_srgb); |
| 163 | }}; | 38 | } |
| 164 | 39 | ||
| 165 | } // Anonymous namespace | 40 | } // Anonymous namespace |
| 166 | 41 | ||
| 167 | FormatLookupTable::FormatLookupTable() { | 42 | PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, |
| 168 | table.fill(static_cast<u8>(PixelFormat::Invalid)); | 43 | ComponentType blue, ComponentType alpha, |
| 169 | 44 | bool is_srgb) noexcept { | |
| 170 | for (const auto& entry : DefinitionTable) { | 45 | switch (Hash(format, red, green, blue, alpha, is_srgb)) { |
| 171 | table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, | 46 | case Hash(TextureFormat::A8R8G8B8, UNORM): |
| 172 | entry.green_component, entry.blue_component, entry.alpha_component)] = | 47 | return PixelFormat::A8B8G8R8_UNORM; |
| 173 | static_cast<u8>(entry.pixel_format); | 48 | case Hash(TextureFormat::A8R8G8B8, SNORM): |
| 174 | } | 49 | return PixelFormat::A8B8G8R8_SNORM; |
| 175 | } | 50 | case Hash(TextureFormat::A8R8G8B8, UINT): |
| 176 | 51 | return PixelFormat::A8B8G8R8_UINT; | |
| 177 | PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, | 52 | case Hash(TextureFormat::A8R8G8B8, SINT): |
| 178 | ComponentType red_component, | 53 | return PixelFormat::A8B8G8R8_SINT; |
| 179 | ComponentType green_component, | 54 | case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): |
| 180 | ComponentType blue_component, | 55 | return PixelFormat::A8B8G8R8_SRGB; |
| 181 | ComponentType alpha_component) const noexcept { | 56 | case Hash(TextureFormat::B5G6R5, UNORM): |
| 182 | const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( | 57 | return PixelFormat::B5G6R5_UNORM; |
| 183 | format, is_srgb, red_component, green_component, blue_component, alpha_component)]); | 58 | case Hash(TextureFormat::A2B10G10R10, UNORM): |
| 184 | // [[likely]] | 59 | return PixelFormat::A2B10G10R10_UNORM; |
| 185 | if (pixel_format != PixelFormat::Invalid) { | 60 | case Hash(TextureFormat::A2B10G10R10, UINT): |
| 186 | return pixel_format; | 61 | return PixelFormat::A2B10G10R10_UINT; |
| 62 | case Hash(TextureFormat::A1B5G5R5, UNORM): | ||
| 63 | return PixelFormat::A1B5G5R5_UNORM; | ||
| 64 | case Hash(TextureFormat::A4B4G4R4, UNORM): | ||
| 65 | return PixelFormat::A4B4G4R4_UNORM; | ||
| 66 | case Hash(TextureFormat::R8, UNORM): | ||
| 67 | return PixelFormat::R8_UNORM; | ||
| 68 | case Hash(TextureFormat::R8, SNORM): | ||
| 69 | return PixelFormat::R8_SNORM; | ||
| 70 | case Hash(TextureFormat::R8, UINT): | ||
| 71 | return PixelFormat::R8_UINT; | ||
| 72 | case Hash(TextureFormat::R8, SINT): | ||
| 73 | return PixelFormat::R8_SINT; | ||
| 74 | case Hash(TextureFormat::R8G8, UNORM): | ||
| 75 | return PixelFormat::R8G8_UNORM; | ||
| 76 | case Hash(TextureFormat::R8G8, SNORM): | ||
| 77 | return PixelFormat::R8G8_SNORM; | ||
| 78 | case Hash(TextureFormat::R8G8, UINT): | ||
| 79 | return PixelFormat::R8G8_UINT; | ||
| 80 | case Hash(TextureFormat::R8G8, SINT): | ||
| 81 | return PixelFormat::R8G8_SINT; | ||
| 82 | case Hash(TextureFormat::R16G16B16A16, FLOAT): | ||
| 83 | return PixelFormat::R16G16B16A16_FLOAT; | ||
| 84 | case Hash(TextureFormat::R16G16B16A16, UNORM): | ||
| 85 | return PixelFormat::R16G16B16A16_UNORM; | ||
| 86 | case Hash(TextureFormat::R16G16B16A16, SNORM): | ||
| 87 | return PixelFormat::R16G16B16A16_SNORM; | ||
| 88 | case Hash(TextureFormat::R16G16B16A16, UINT): | ||
| 89 | return PixelFormat::R16G16B16A16_UINT; | ||
| 90 | case Hash(TextureFormat::R16G16B16A16, SINT): | ||
| 91 | return PixelFormat::R16G16B16A16_SINT; | ||
| 92 | case Hash(TextureFormat::R16G16, FLOAT): | ||
| 93 | return PixelFormat::R16G16_FLOAT; | ||
| 94 | case Hash(TextureFormat::R16G16, UNORM): | ||
| 95 | return PixelFormat::R16G16_UNORM; | ||
| 96 | case Hash(TextureFormat::R16G16, SNORM): | ||
| 97 | return PixelFormat::R16G16_SNORM; | ||
| 98 | case Hash(TextureFormat::R16G16, UINT): | ||
| 99 | return PixelFormat::R16G16_UINT; | ||
| 100 | case Hash(TextureFormat::R16G16, SINT): | ||
| 101 | return PixelFormat::R16G16_SINT; | ||
| 102 | case Hash(TextureFormat::R16, FLOAT): | ||
| 103 | return PixelFormat::R16_FLOAT; | ||
| 104 | case Hash(TextureFormat::R16, UNORM): | ||
| 105 | return PixelFormat::R16_UNORM; | ||
| 106 | case Hash(TextureFormat::R16, SNORM): | ||
| 107 | return PixelFormat::R16_SNORM; | ||
| 108 | case Hash(TextureFormat::R16, UINT): | ||
| 109 | return PixelFormat::R16_UINT; | ||
| 110 | case Hash(TextureFormat::R16, SINT): | ||
| 111 | return PixelFormat::R16_SINT; | ||
| 112 | case Hash(TextureFormat::B10G11R11, FLOAT): | ||
| 113 | return PixelFormat::B10G11R11_FLOAT; | ||
| 114 | case Hash(TextureFormat::R32G32B32A32, FLOAT): | ||
| 115 | return PixelFormat::R32G32B32A32_FLOAT; | ||
| 116 | case Hash(TextureFormat::R32G32B32A32, UINT): | ||
| 117 | return PixelFormat::R32G32B32A32_UINT; | ||
| 118 | case Hash(TextureFormat::R32G32B32A32, SINT): | ||
| 119 | return PixelFormat::R32G32B32A32_SINT; | ||
| 120 | case Hash(TextureFormat::R32G32B32, FLOAT): | ||
| 121 | return PixelFormat::R32G32B32_FLOAT; | ||
| 122 | case Hash(TextureFormat::R32G32, FLOAT): | ||
| 123 | return PixelFormat::R32G32_FLOAT; | ||
| 124 | case Hash(TextureFormat::R32G32, UINT): | ||
| 125 | return PixelFormat::R32G32_UINT; | ||
| 126 | case Hash(TextureFormat::R32G32, SINT): | ||
| 127 | return PixelFormat::R32G32_SINT; | ||
| 128 | case Hash(TextureFormat::R32, FLOAT): | ||
| 129 | return PixelFormat::R32_FLOAT; | ||
| 130 | case Hash(TextureFormat::R32, UINT): | ||
| 131 | return PixelFormat::R32_UINT; | ||
| 132 | case Hash(TextureFormat::R32, SINT): | ||
| 133 | return PixelFormat::R32_SINT; | ||
| 134 | case Hash(TextureFormat::E5B9G9R9, FLOAT): | ||
| 135 | return PixelFormat::E5B9G9R9_FLOAT; | ||
| 136 | case Hash(TextureFormat::D32, FLOAT): | ||
| 137 | return PixelFormat::D32_FLOAT; | ||
| 138 | case Hash(TextureFormat::D16, UNORM): | ||
| 139 | return PixelFormat::D16_UNORM; | ||
| 140 | case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 141 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 142 | case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 143 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 144 | case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): | ||
| 145 | return PixelFormat::D32_FLOAT_S8_UINT; | ||
| 146 | case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): | ||
| 147 | return PixelFormat::BC1_RGBA_UNORM; | ||
| 148 | case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): | ||
| 149 | return PixelFormat::BC1_RGBA_SRGB; | ||
| 150 | case Hash(TextureFormat::BC2, UNORM, LINEAR): | ||
| 151 | return PixelFormat::BC2_UNORM; | ||
| 152 | case Hash(TextureFormat::BC2, UNORM, SRGB): | ||
| 153 | return PixelFormat::BC2_SRGB; | ||
| 154 | case Hash(TextureFormat::BC3, UNORM, LINEAR): | ||
| 155 | return PixelFormat::BC3_UNORM; | ||
| 156 | case Hash(TextureFormat::BC3, UNORM, SRGB): | ||
| 157 | return PixelFormat::BC3_SRGB; | ||
| 158 | case Hash(TextureFormat::BC4, UNORM): | ||
| 159 | return PixelFormat::BC4_UNORM; | ||
| 160 | case Hash(TextureFormat::BC4, SNORM): | ||
| 161 | return PixelFormat::BC4_SNORM; | ||
| 162 | case Hash(TextureFormat::BC5, UNORM): | ||
| 163 | return PixelFormat::BC5_UNORM; | ||
| 164 | case Hash(TextureFormat::BC5, SNORM): | ||
| 165 | return PixelFormat::BC5_SNORM; | ||
| 166 | case Hash(TextureFormat::BC7, UNORM, LINEAR): | ||
| 167 | return PixelFormat::BC7_UNORM; | ||
| 168 | case Hash(TextureFormat::BC7, UNORM, SRGB): | ||
| 169 | return PixelFormat::BC7_SRGB; | ||
| 170 | case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): | ||
| 171 | return PixelFormat::BC6H_SFLOAT; | ||
| 172 | case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): | ||
| 173 | return PixelFormat::BC6H_UFLOAT; | ||
| 174 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): | ||
| 175 | return PixelFormat::ASTC_2D_4X4_UNORM; | ||
| 176 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): | ||
| 177 | return PixelFormat::ASTC_2D_4X4_SRGB; | ||
| 178 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): | ||
| 179 | return PixelFormat::ASTC_2D_5X4_UNORM; | ||
| 180 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): | ||
| 181 | return PixelFormat::ASTC_2D_5X4_SRGB; | ||
| 182 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): | ||
| 183 | return PixelFormat::ASTC_2D_5X5_UNORM; | ||
| 184 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): | ||
| 185 | return PixelFormat::ASTC_2D_5X5_SRGB; | ||
| 186 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): | ||
| 187 | return PixelFormat::ASTC_2D_8X8_UNORM; | ||
| 188 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): | ||
| 189 | return PixelFormat::ASTC_2D_8X8_SRGB; | ||
| 190 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): | ||
| 191 | return PixelFormat::ASTC_2D_8X5_UNORM; | ||
| 192 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): | ||
| 193 | return PixelFormat::ASTC_2D_8X5_SRGB; | ||
| 194 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): | ||
| 195 | return PixelFormat::ASTC_2D_10X8_UNORM; | ||
| 196 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): | ||
| 197 | return PixelFormat::ASTC_2D_10X8_SRGB; | ||
| 198 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): | ||
| 199 | return PixelFormat::ASTC_2D_6X6_UNORM; | ||
| 200 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): | ||
| 201 | return PixelFormat::ASTC_2D_6X6_SRGB; | ||
| 202 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): | ||
| 203 | return PixelFormat::ASTC_2D_10X10_UNORM; | ||
| 204 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): | ||
| 205 | return PixelFormat::ASTC_2D_10X10_SRGB; | ||
| 206 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): | ||
| 207 | return PixelFormat::ASTC_2D_12X12_UNORM; | ||
| 208 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): | ||
| 209 | return PixelFormat::ASTC_2D_12X12_SRGB; | ||
| 210 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): | ||
| 211 | return PixelFormat::ASTC_2D_8X6_UNORM; | ||
| 212 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): | ||
| 213 | return PixelFormat::ASTC_2D_8X6_SRGB; | ||
| 214 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): | ||
| 215 | return PixelFormat::ASTC_2D_6X5_UNORM; | ||
| 216 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): | ||
| 217 | return PixelFormat::ASTC_2D_6X5_SRGB; | ||
| 187 | } | 218 | } |
| 188 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", | 219 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", |
| 189 | static_cast<int>(format), is_srgb, static_cast<int>(red_component), | 220 | static_cast<int>(format), is_srgb, static_cast<int>(red), |
| 190 | static_cast<int>(green_component), static_cast<int>(blue_component), | 221 | static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha)); |
| 191 | static_cast<int>(alpha_component)); | ||
| 192 | return PixelFormat::A8B8G8R8_UNORM; | 222 | return PixelFormat::A8B8G8R8_UNORM; |
| 193 | } | 223 | } |
| 194 | 224 | ||
| 195 | void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, | ||
| 196 | ComponentType green_component, ComponentType blue_component, | ||
| 197 | ComponentType alpha_component, PixelFormat pixel_format) {} | ||
| 198 | |||
| 199 | std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, | ||
| 200 | ComponentType red_component, | ||
| 201 | ComponentType green_component, | ||
| 202 | ComponentType blue_component, | ||
| 203 | ComponentType alpha_component) noexcept { | ||
| 204 | const auto format_index = static_cast<std::size_t>(format); | ||
| 205 | const auto red_index = static_cast<std::size_t>(red_component); | ||
| 206 | const auto green_index = static_cast<std::size_t>(green_component); | ||
| 207 | const auto blue_index = static_cast<std::size_t>(blue_component); | ||
| 208 | const auto alpha_index = static_cast<std::size_t>(alpha_component); | ||
| 209 | const std::size_t srgb_index = is_srgb ? 1 : 0; | ||
| 210 | |||
| 211 | return format_index * PerFormat + | ||
| 212 | srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + | ||
| 213 | alpha_index * PerComponent * PerComponent * PerComponent + | ||
| 214 | blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; | ||
| 215 | } | ||
| 216 | |||
| 217 | } // namespace VideoCommon | 225 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h | |||
| @@ -4,48 +4,14 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <limits> | ||
| 9 | #include "video_core/surface.h" | 7 | #include "video_core/surface.h" |
| 10 | #include "video_core/textures/texture.h" | 8 | #include "video_core/textures/texture.h" |
| 11 | 9 | ||
| 12 | namespace VideoCommon { | 10 | namespace VideoCommon { |
| 13 | 11 | ||
| 14 | class FormatLookupTable { | 12 | VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( |
| 15 | public: | 13 | Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, |
| 16 | explicit FormatLookupTable(); | 14 | Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, |
| 17 | 15 | Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; | |
| 18 | VideoCore::Surface::PixelFormat GetPixelFormat( | ||
| 19 | Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 20 | Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, | ||
| 21 | Tegra::Texture::ComponentType blue_component, | ||
| 22 | Tegra::Texture::ComponentType alpha_component) const noexcept; | ||
| 23 | |||
| 24 | private: | ||
| 25 | static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); | ||
| 26 | |||
| 27 | static constexpr std::size_t NumTextureFormats = 128; | ||
| 28 | |||
| 29 | static constexpr std::size_t PerComponent = 8; | ||
| 30 | static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; | ||
| 31 | static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; | ||
| 32 | static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; | ||
| 33 | static constexpr std::size_t PerFormat = PerComponents4 * 2; | ||
| 34 | |||
| 35 | static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 36 | Tegra::Texture::ComponentType red_component, | ||
| 37 | Tegra::Texture::ComponentType green_component, | ||
| 38 | Tegra::Texture::ComponentType blue_component, | ||
| 39 | Tegra::Texture::ComponentType alpha_component) noexcept; | ||
| 40 | |||
| 41 | void Set(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 42 | Tegra::Texture::ComponentType red_component, | ||
| 43 | Tegra::Texture::ComponentType green_component, | ||
| 44 | Tegra::Texture::ComponentType blue_component, | ||
| 45 | Tegra::Texture::ComponentType alpha_component, | ||
| 46 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 47 | |||
| 48 | std::array<u8, NumTextureFormats * PerFormat> table; | ||
| 49 | }; | ||
| 50 | 16 | ||
| 51 | } // namespace VideoCommon | 17 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "video_core/texture_cache/formatter.h" | ||
| 9 | #include "video_core/texture_cache/image_base.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/image_view_base.h" | ||
| 12 | #include "video_core/texture_cache/render_targets.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | std::string Name(const ImageBase& image) { | ||
| 17 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 18 | const ImageInfo& info = image.info; | ||
| 19 | const u32 width = info.size.width; | ||
| 20 | const u32 height = info.size.height; | ||
| 21 | const u32 depth = info.size.depth; | ||
| 22 | const u32 num_layers = image.info.resources.layers; | ||
| 23 | const u32 num_levels = image.info.resources.levels; | ||
| 24 | std::string resource; | ||
| 25 | if (num_layers > 1) { | ||
| 26 | resource += fmt::format(":L{}", num_layers); | ||
| 27 | } | ||
| 28 | if (num_levels > 1) { | ||
| 29 | resource += fmt::format(":M{}", num_levels); | ||
| 30 | } | ||
| 31 | switch (image.info.type) { | ||
| 32 | case ImageType::e1D: | ||
| 33 | return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); | ||
| 34 | case ImageType::e2D: | ||
| 35 | return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); | ||
| 36 | case ImageType::e3D: | ||
| 37 | return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); | ||
| 38 | case ImageType::Linear: | ||
| 39 | return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); | ||
| 40 | case ImageType::Buffer: | ||
| 41 | return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); | ||
| 42 | } | ||
| 43 | return "Invalid"; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | ||
| 47 | const u32 width = image_view.size.width; | ||
| 48 | const u32 height = image_view.size.height; | ||
| 49 | const u32 depth = image_view.size.depth; | ||
| 50 | const u32 num_levels = image_view.range.extent.levels; | ||
| 51 | const u32 num_layers = image_view.range.extent.layers; | ||
| 52 | |||
| 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | ||
| 54 | switch (type.value_or(image_view.type)) { | ||
| 55 | case ImageViewType::e1D: | ||
| 56 | return fmt::format("ImageView 1D {}{}", width, level); | ||
| 57 | case ImageViewType::e2D: | ||
| 58 | return fmt::format("ImageView 2D {}x{}{}", width, height, level); | ||
| 59 | case ImageViewType::Cube: | ||
| 60 | return fmt::format("ImageView Cube {}x{}{}", width, height, level); | ||
| 61 | case ImageViewType::e3D: | ||
| 62 | return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); | ||
| 63 | case ImageViewType::e1DArray: | ||
| 64 | return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); | ||
| 65 | case ImageViewType::e2DArray: | ||
| 66 | return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 67 | case ImageViewType::CubeArray: | ||
| 68 | return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 69 | case ImageViewType::Rect: | ||
| 70 | return fmt::format("ImageView Rect {}x{}{}", width, height, level); | ||
| 71 | case ImageViewType::Buffer: | ||
| 72 | return fmt::format("BufferView {}", width); | ||
| 73 | } | ||
| 74 | return "Invalid"; | ||
| 75 | } | ||
| 76 | |||
| 77 | std::string Name(const RenderTargets& render_targets) { | ||
| 78 | std::string_view debug_prefix; | ||
| 79 | const auto num_color = std::ranges::count_if( | ||
| 80 | render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); }); | ||
| 81 | if (render_targets.depth_buffer_id) { | ||
| 82 | debug_prefix = num_color > 0 ? "R" : "Z"; | ||
| 83 | } else { | ||
| 84 | debug_prefix = num_color > 0 ? "C" : "X"; | ||
| 85 | } | ||
| 86 | const Extent2D size = render_targets.size; | ||
| 87 | if (num_color > 0) { | ||
| 88 | return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, | ||
| 89 | size.height); | ||
| 90 | } else { | ||
| 91 | return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -0,0 +1,263 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | template <> | ||
| 15 | struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> { | ||
| 16 | template <typename FormatContext> | ||
| 17 | auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | const string_view name = [format] { | ||
| 20 | switch (format) { | ||
| 21 | case PixelFormat::A8B8G8R8_UNORM: | ||
| 22 | return "A8B8G8R8_UNORM"; | ||
| 23 | case PixelFormat::A8B8G8R8_SNORM: | ||
| 24 | return "A8B8G8R8_SNORM"; | ||
| 25 | case PixelFormat::A8B8G8R8_SINT: | ||
| 26 | return "A8B8G8R8_SINT"; | ||
| 27 | case PixelFormat::A8B8G8R8_UINT: | ||
| 28 | return "A8B8G8R8_UINT"; | ||
| 29 | case PixelFormat::R5G6B5_UNORM: | ||
| 30 | return "R5G6B5_UNORM"; | ||
| 31 | case PixelFormat::B5G6R5_UNORM: | ||
| 32 | return "B5G6R5_UNORM"; | ||
| 33 | case PixelFormat::A1R5G5B5_UNORM: | ||
| 34 | return "A1R5G5B5_UNORM"; | ||
| 35 | case PixelFormat::A2B10G10R10_UNORM: | ||
| 36 | return "A2B10G10R10_UNORM"; | ||
| 37 | case PixelFormat::A2B10G10R10_UINT: | ||
| 38 | return "A2B10G10R10_UINT"; | ||
| 39 | case PixelFormat::A1B5G5R5_UNORM: | ||
| 40 | return "A1B5G5R5_UNORM"; | ||
| 41 | case PixelFormat::R8_UNORM: | ||
| 42 | return "R8_UNORM"; | ||
| 43 | case PixelFormat::R8_SNORM: | ||
| 44 | return "R8_SNORM"; | ||
| 45 | case PixelFormat::R8_SINT: | ||
| 46 | return "R8_SINT"; | ||
| 47 | case PixelFormat::R8_UINT: | ||
| 48 | return "R8_UINT"; | ||
| 49 | case PixelFormat::R16G16B16A16_FLOAT: | ||
| 50 | return "R16G16B16A16_FLOAT"; | ||
| 51 | case PixelFormat::R16G16B16A16_UNORM: | ||
| 52 | return "R16G16B16A16_UNORM"; | ||
| 53 | case PixelFormat::R16G16B16A16_SNORM: | ||
| 54 | return "R16G16B16A16_SNORM"; | ||
| 55 | case PixelFormat::R16G16B16A16_SINT: | ||
| 56 | return "R16G16B16A16_SINT"; | ||
| 57 | case PixelFormat::R16G16B16A16_UINT: | ||
| 58 | return "R16G16B16A16_UINT"; | ||
| 59 | case PixelFormat::B10G11R11_FLOAT: | ||
| 60 | return "B10G11R11_FLOAT"; | ||
| 61 | case PixelFormat::R32G32B32A32_UINT: | ||
| 62 | return "R32G32B32A32_UINT"; | ||
| 63 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 64 | return "BC1_RGBA_UNORM"; | ||
| 65 | case PixelFormat::BC2_UNORM: | ||
| 66 | return "BC2_UNORM"; | ||
| 67 | case PixelFormat::BC3_UNORM: | ||
| 68 | return "BC3_UNORM"; | ||
| 69 | case PixelFormat::BC4_UNORM: | ||
| 70 | return "BC4_UNORM"; | ||
| 71 | case PixelFormat::BC4_SNORM: | ||
| 72 | return "BC4_SNORM"; | ||
| 73 | case PixelFormat::BC5_UNORM: | ||
| 74 | return "BC5_UNORM"; | ||
| 75 | case PixelFormat::BC5_SNORM: | ||
| 76 | return "BC5_SNORM"; | ||
| 77 | case PixelFormat::BC7_UNORM: | ||
| 78 | return "BC7_UNORM"; | ||
| 79 | case PixelFormat::BC6H_UFLOAT: | ||
| 80 | return "BC6H_UFLOAT"; | ||
| 81 | case PixelFormat::BC6H_SFLOAT: | ||
| 82 | return "BC6H_SFLOAT"; | ||
| 83 | case PixelFormat::ASTC_2D_4X4_UNORM: | ||
| 84 | return "ASTC_2D_4X4_UNORM"; | ||
| 85 | case PixelFormat::B8G8R8A8_UNORM: | ||
| 86 | return "B8G8R8A8_UNORM"; | ||
| 87 | case PixelFormat::R32G32B32A32_FLOAT: | ||
| 88 | return "R32G32B32A32_FLOAT"; | ||
| 89 | case PixelFormat::R32G32B32A32_SINT: | ||
| 90 | return "R32G32B32A32_SINT"; | ||
| 91 | case PixelFormat::R32G32_FLOAT: | ||
| 92 | return "R32G32_FLOAT"; | ||
| 93 | case PixelFormat::R32G32_SINT: | ||
| 94 | return "R32G32_SINT"; | ||
| 95 | case PixelFormat::R32_FLOAT: | ||
| 96 | return "R32_FLOAT"; | ||
| 97 | case PixelFormat::R16_FLOAT: | ||
| 98 | return "R16_FLOAT"; | ||
| 99 | case PixelFormat::R16_UNORM: | ||
| 100 | return "R16_UNORM"; | ||
| 101 | case PixelFormat::R16_SNORM: | ||
| 102 | return "R16_SNORM"; | ||
| 103 | case PixelFormat::R16_UINT: | ||
| 104 | return "R16_UINT"; | ||
| 105 | case PixelFormat::R16_SINT: | ||
| 106 | return "R16_SINT"; | ||
| 107 | case PixelFormat::R16G16_UNORM: | ||
| 108 | return "R16G16_UNORM"; | ||
| 109 | case PixelFormat::R16G16_FLOAT: | ||
| 110 | return "R16G16_FLOAT"; | ||
| 111 | case PixelFormat::R16G16_UINT: | ||
| 112 | return "R16G16_UINT"; | ||
| 113 | case PixelFormat::R16G16_SINT: | ||
| 114 | return "R16G16_SINT"; | ||
| 115 | case PixelFormat::R16G16_SNORM: | ||
| 116 | return "R16G16_SNORM"; | ||
| 117 | case PixelFormat::R32G32B32_FLOAT: | ||
| 118 | return "R32G32B32_FLOAT"; | ||
| 119 | case PixelFormat::A8B8G8R8_SRGB: | ||
| 120 | return "A8B8G8R8_SRGB"; | ||
| 121 | case PixelFormat::R8G8_UNORM: | ||
| 122 | return "R8G8_UNORM"; | ||
| 123 | case PixelFormat::R8G8_SNORM: | ||
| 124 | return "R8G8_SNORM"; | ||
| 125 | case PixelFormat::R8G8_SINT: | ||
| 126 | return "R8G8_SINT"; | ||
| 127 | case PixelFormat::R8G8_UINT: | ||
| 128 | return "R8G8_UINT"; | ||
| 129 | case PixelFormat::R32G32_UINT: | ||
| 130 | return "R32G32_UINT"; | ||
| 131 | case PixelFormat::R16G16B16X16_FLOAT: | ||
| 132 | return "R16G16B16X16_FLOAT"; | ||
| 133 | case PixelFormat::R32_UINT: | ||
| 134 | return "R32_UINT"; | ||
| 135 | case PixelFormat::R32_SINT: | ||
| 136 | return "R32_SINT"; | ||
| 137 | case PixelFormat::ASTC_2D_8X8_UNORM: | ||
| 138 | return "ASTC_2D_8X8_UNORM"; | ||
| 139 | case PixelFormat::ASTC_2D_8X5_UNORM: | ||
| 140 | return "ASTC_2D_8X5_UNORM"; | ||
| 141 | case PixelFormat::ASTC_2D_5X4_UNORM: | ||
| 142 | return "ASTC_2D_5X4_UNORM"; | ||
| 143 | case PixelFormat::B8G8R8A8_SRGB: | ||
| 144 | return "B8G8R8A8_SRGB"; | ||
| 145 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 146 | return "BC1_RGBA_SRGB"; | ||
| 147 | case PixelFormat::BC2_SRGB: | ||
| 148 | return "BC2_SRGB"; | ||
| 149 | case PixelFormat::BC3_SRGB: | ||
| 150 | return "BC3_SRGB"; | ||
| 151 | case PixelFormat::BC7_SRGB: | ||
| 152 | return "BC7_SRGB"; | ||
| 153 | case PixelFormat::A4B4G4R4_UNORM: | ||
| 154 | return "A4B4G4R4_UNORM"; | ||
| 155 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 156 | return "ASTC_2D_4X4_SRGB"; | ||
| 157 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 158 | return "ASTC_2D_8X8_SRGB"; | ||
| 159 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 160 | return "ASTC_2D_8X5_SRGB"; | ||
| 161 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 162 | return "ASTC_2D_5X4_SRGB"; | ||
| 163 | case PixelFormat::ASTC_2D_5X5_UNORM: | ||
| 164 | return "ASTC_2D_5X5_UNORM"; | ||
| 165 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 166 | return "ASTC_2D_5X5_SRGB"; | ||
| 167 | case PixelFormat::ASTC_2D_10X8_UNORM: | ||
| 168 | return "ASTC_2D_10X8_UNORM"; | ||
| 169 | case PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 170 | return "ASTC_2D_10X8_SRGB"; | ||
| 171 | case PixelFormat::ASTC_2D_6X6_UNORM: | ||
| 172 | return "ASTC_2D_6X6_UNORM"; | ||
| 173 | case PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 174 | return "ASTC_2D_6X6_SRGB"; | ||
| 175 | case PixelFormat::ASTC_2D_10X10_UNORM: | ||
| 176 | return "ASTC_2D_10X10_UNORM"; | ||
| 177 | case PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 178 | return "ASTC_2D_10X10_SRGB"; | ||
| 179 | case PixelFormat::ASTC_2D_12X12_UNORM: | ||
| 180 | return "ASTC_2D_12X12_UNORM"; | ||
| 181 | case PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 182 | return "ASTC_2D_12X12_SRGB"; | ||
| 183 | case PixelFormat::ASTC_2D_8X6_UNORM: | ||
| 184 | return "ASTC_2D_8X6_UNORM"; | ||
| 185 | case PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 186 | return "ASTC_2D_8X6_SRGB"; | ||
| 187 | case PixelFormat::ASTC_2D_6X5_UNORM: | ||
| 188 | return "ASTC_2D_6X5_UNORM"; | ||
| 189 | case PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 190 | return "ASTC_2D_6X5_SRGB"; | ||
| 191 | case PixelFormat::E5B9G9R9_FLOAT: | ||
| 192 | return "E5B9G9R9_FLOAT"; | ||
| 193 | case PixelFormat::D32_FLOAT: | ||
| 194 | return "D32_FLOAT"; | ||
| 195 | case PixelFormat::D16_UNORM: | ||
| 196 | return "D16_UNORM"; | ||
| 197 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 198 | return "D24_UNORM_S8_UINT"; | ||
| 199 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 200 | return "S8_UINT_D24_UNORM"; | ||
| 201 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 202 | return "D32_FLOAT_S8_UINT"; | ||
| 203 | case PixelFormat::MaxDepthStencilFormat: | ||
| 204 | case PixelFormat::Invalid: | ||
| 205 | return "Invalid"; | ||
| 206 | } | ||
| 207 | return "Invalid"; | ||
| 208 | }(); | ||
| 209 | return formatter<string_view>::format(name, ctx); | ||
| 210 | } | ||
| 211 | }; | ||
| 212 | |||
| 213 | template <> | ||
| 214 | struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> { | ||
| 215 | template <typename FormatContext> | ||
| 216 | auto format(VideoCommon::ImageType type, FormatContext& ctx) { | ||
| 217 | const string_view name = [type] { | ||
| 218 | using VideoCommon::ImageType; | ||
| 219 | switch (type) { | ||
| 220 | case ImageType::e1D: | ||
| 221 | return "1D"; | ||
| 222 | case ImageType::e2D: | ||
| 223 | return "2D"; | ||
| 224 | case ImageType::e3D: | ||
| 225 | return "3D"; | ||
| 226 | case ImageType::Linear: | ||
| 227 | return "Linear"; | ||
| 228 | case ImageType::Buffer: | ||
| 229 | return "Buffer"; | ||
| 230 | } | ||
| 231 | return "Invalid"; | ||
| 232 | }(); | ||
| 233 | return formatter<string_view>::format(name, ctx); | ||
| 234 | } | ||
| 235 | }; | ||
| 236 | |||
| 237 | template <> | ||
| 238 | struct fmt::formatter<VideoCommon::Extent3D> { | ||
| 239 | constexpr auto parse(fmt::format_parse_context& ctx) { | ||
| 240 | return ctx.begin(); | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename FormatContext> | ||
| 244 | auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { | ||
| 245 | return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, | ||
| 246 | extent.depth); | ||
| 247 | } | ||
| 248 | }; | ||
| 249 | |||
| 250 | namespace VideoCommon { | ||
| 251 | |||
| 252 | struct ImageBase; | ||
| 253 | struct ImageViewBase; | ||
| 254 | struct RenderTargets; | ||
| 255 | |||
| 256 | [[nodiscard]] std::string Name(const ImageBase& image); | ||
| 257 | |||
| 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, | ||
| 259 | std::optional<ImageViewType> type = std::nullopt); | ||
| 260 | |||
| 261 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | ||
| 262 | |||
| 263 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..448a05fcc --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -0,0 +1,216 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/formatter.h" | ||
| 13 | #include "video_core/texture_cache/image_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/util.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 20 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | /// Returns the base layer and mip level offset | ||
| 24 | [[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) { | ||
| 25 | if (layer_stride == 0) { | ||
| 26 | return {0, diff}; | ||
| 27 | } else { | ||
| 28 | return {diff / layer_stride, diff % layer_stride}; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | [[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { | ||
| 33 | return layers.base_level < info.resources.levels && | ||
| 34 | layers.base_layer + layers.num_layers <= info.resources.layers; | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { | ||
| 38 | const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); | ||
| 39 | const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); | ||
| 40 | if (!ValidateLayers(copy.src_subresource, src)) { | ||
| 41 | return false; | ||
| 42 | } | ||
| 43 | if (!ValidateLayers(copy.dst_subresource, dst)) { | ||
| 44 | return false; | ||
| 45 | } | ||
| 46 | if (copy.src_offset.x + copy.extent.width > src_size.width || | ||
| 47 | copy.src_offset.y + copy.extent.height > src_size.height || | ||
| 48 | copy.src_offset.z + copy.extent.depth > src_size.depth) { | ||
| 49 | return false; | ||
| 50 | } | ||
| 51 | if (copy.dst_offset.x + copy.extent.width > dst_size.width || | ||
| 52 | copy.dst_offset.y + copy.extent.height > dst_size.height || | ||
| 53 | copy.dst_offset.z + copy.extent.depth > dst_size.depth) { | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | return true; | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) | ||
| 61 | : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, | ||
| 62 | unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, | ||
| 63 | converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, | ||
| 64 | cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, | ||
| 65 | mip_level_offsets{CalculateMipLevelOffsets(info)} { | ||
| 66 | if (info.type == ImageType::e3D) { | ||
| 67 | slice_offsets = CalculateSliceOffsets(info); | ||
| 68 | slice_subresources = CalculateSliceSubresources(info); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { | ||
| 73 | if (other_addr < gpu_addr) { | ||
| 74 | // Subresource address can't be lower than the base | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 diff = static_cast<u32>(other_addr - gpu_addr); | ||
| 78 | if (diff > guest_size_bytes) { | ||
| 79 | // This can happen when two CPU addresses are used for different GPU addresses | ||
| 80 | return std::nullopt; | ||
| 81 | } | ||
| 82 | if (info.type != ImageType::e3D) { | ||
| 83 | const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); | ||
| 84 | const auto end = mip_level_offsets.begin() + info.resources.levels; | ||
| 85 | const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); | ||
| 86 | if (layer > info.resources.layers || it == end) { | ||
| 87 | return std::nullopt; | ||
| 88 | } | ||
| 89 | return SubresourceBase{ | ||
| 90 | .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)), | ||
| 91 | .layer = layer, | ||
| 92 | }; | ||
| 93 | } else { | ||
| 94 | // TODO: Consider using binary_search after a threshold | ||
| 95 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 96 | if (it == slice_offsets.cend()) { | ||
| 97 | return std::nullopt; | ||
| 98 | } | ||
| 99 | return slice_subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { | ||
| 104 | const auto it = std::ranges::find(image_view_infos, view_info); | ||
| 105 | if (it == image_view_infos.end()) { | ||
| 106 | return ImageViewId{}; | ||
| 107 | } | ||
| 108 | return image_view_ids[std::distance(image_view_infos.begin(), it)]; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { | ||
| 112 | image_view_infos.push_back(view_info); | ||
| 113 | image_view_ids.push_back(image_view_id); | ||
| 114 | } | ||
| 115 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | ||
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 118 | ASSERT(lhs.info.type == rhs.info.type); | ||
| 119 | std::optional<SubresourceBase> base; | ||
| 120 | if (lhs.info.type == ImageType::Linear) { | ||
| 121 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 122 | } else { | ||
| 123 | base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); | ||
| 124 | } | ||
| 125 | if (!base) { | ||
| 126 | LOG_ERROR(HW_GPU, "Image alias should have been flipped"); | ||
| 127 | return; | ||
| 128 | } | ||
| 129 | const PixelFormat lhs_format = lhs.info.format; | ||
| 130 | const PixelFormat rhs_format = rhs.info.format; | ||
| 131 | const Extent2D lhs_block{ | ||
| 132 | .width = DefaultBlockWidth(lhs_format), | ||
| 133 | .height = DefaultBlockHeight(lhs_format), | ||
| 134 | }; | ||
| 135 | const Extent2D rhs_block{ | ||
| 136 | .width = DefaultBlockWidth(rhs_format), | ||
| 137 | .height = DefaultBlockHeight(rhs_format), | ||
| 138 | }; | ||
| 139 | const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; | ||
| 140 | const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; | ||
| 141 | if (is_lhs_compressed && is_rhs_compressed) { | ||
| 142 | LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); | ||
| 143 | return; | ||
| 144 | } | ||
| 145 | const s32 lhs_mips = lhs.info.resources.levels; | ||
| 146 | const s32 rhs_mips = rhs.info.resources.levels; | ||
| 147 | const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); | ||
| 148 | AliasedImage lhs_alias; | ||
| 149 | AliasedImage rhs_alias; | ||
| 150 | lhs_alias.id = rhs_id; | ||
| 151 | rhs_alias.id = lhs_id; | ||
| 152 | lhs_alias.copies.reserve(num_mips); | ||
| 153 | rhs_alias.copies.reserve(num_mips); | ||
| 154 | for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { | ||
| 155 | Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); | ||
| 156 | Extent3D rhs_size = MipSize(rhs.info.size, mip_level); | ||
| 157 | if (is_lhs_compressed) { | ||
| 158 | lhs_size.width /= lhs_block.width; | ||
| 159 | lhs_size.height /= lhs_block.height; | ||
| 160 | } | ||
| 161 | if (is_rhs_compressed) { | ||
| 162 | rhs_size.width /= rhs_block.width; | ||
| 163 | rhs_size.height /= rhs_block.height; | ||
| 164 | } | ||
| 165 | const Extent3D copy_size{ | ||
| 166 | .width = std::min(lhs_size.width, rhs_size.width), | ||
| 167 | .height = std::min(lhs_size.height, rhs_size.height), | ||
| 168 | .depth = std::min(lhs_size.depth, rhs_size.depth), | ||
| 169 | }; | ||
| 170 | if (copy_size.width == 0 || copy_size.height == 0) { | ||
| 171 | LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); | ||
| 172 | continue; | ||
| 173 | } | ||
| 174 | const bool is_lhs_3d = lhs.info.type == ImageType::e3D; | ||
| 175 | const bool is_rhs_3d = rhs.info.type == ImageType::e3D; | ||
| 176 | const Offset3D lhs_offset{0, 0, 0}; | ||
| 177 | const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; | ||
| 178 | const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; | ||
| 179 | const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; | ||
| 180 | const s32 num_layers = std::min(lhs_layers, rhs_layers); | ||
| 181 | const SubresourceLayers lhs_subresource{ | ||
| 182 | .base_level = mip_level, | ||
| 183 | .base_layer = 0, | ||
| 184 | .num_layers = num_layers, | ||
| 185 | }; | ||
| 186 | const SubresourceLayers rhs_subresource{ | ||
| 187 | .base_level = base->level + mip_level, | ||
| 188 | .base_layer = is_rhs_3d ? 0 : base->layer, | ||
| 189 | .num_layers = num_layers, | ||
| 190 | }; | ||
| 191 | [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ | ||
| 192 | .src_subresource = lhs_subresource, | ||
| 193 | .dst_subresource = rhs_subresource, | ||
| 194 | .src_offset = lhs_offset, | ||
| 195 | .dst_offset = rhs_offset, | ||
| 196 | .extent = copy_size, | ||
| 197 | }); | ||
| 198 | [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ | ||
| 199 | .src_subresource = rhs_subresource, | ||
| 200 | .dst_subresource = lhs_subresource, | ||
| 201 | .src_offset = rhs_offset, | ||
| 202 | .dst_offset = lhs_offset, | ||
| 203 | .extent = copy_size, | ||
| 204 | }); | ||
| 205 | ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); | ||
| 206 | ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); | ||
| 207 | } | ||
| 208 | ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); | ||
| 209 | if (lhs_alias.copies.empty()) { | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | lhs.aliased_images.push_back(std::move(lhs_alias)); | ||
| 213 | rhs.aliased_images.push_back(std::move(rhs_alias)); | ||
| 214 | } | ||
| 215 | |||
| 216 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/texture_cache/image_info.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | enum class ImageFlagBits : u32 { | ||
| 20 | AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU | ||
| 21 | Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted | ||
| 22 | CpuModified = 1 << 2, ///< Contents have been modified from the CPU | ||
| 23 | GpuModified = 1 << 3, ///< Contents have been modified from the GPU | ||
| 24 | Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT | ||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | ||
| 26 | Registered = 1 << 6, ///< True when the image is registered | ||
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | ||
| 28 | }; | ||
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | ||
| 30 | |||
| 31 | struct ImageViewInfo; | ||
| 32 | |||
| 33 | struct AliasedImage { | ||
| 34 | std::vector<ImageCopy> copies; | ||
| 35 | ImageId id; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ImageBase { | ||
| 39 | explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 40 | |||
| 41 | [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; | ||
| 42 | |||
| 43 | [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; | ||
| 44 | |||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | ||
| 46 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | ||
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | ||
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | ||
| 50 | } | ||
| 51 | |||
| 52 | ImageInfo info; | ||
| 53 | |||
| 54 | u32 guest_size_bytes = 0; | ||
| 55 | u32 unswizzled_size_bytes = 0; | ||
| 56 | u32 converted_size_bytes = 0; | ||
| 57 | ImageFlagBits flags = ImageFlagBits::CpuModified; | ||
| 58 | |||
| 59 | GPUVAddr gpu_addr = 0; | ||
| 60 | VAddr cpu_addr = 0; | ||
| 61 | VAddr cpu_addr_end = 0; | ||
| 62 | |||
| 63 | u64 modification_tick = 0; | ||
| 64 | u64 frame_tick = 0; | ||
| 65 | |||
| 66 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | ||
| 67 | |||
| 68 | std::vector<ImageViewInfo> image_view_infos; | ||
| 69 | std::vector<ImageViewId> image_view_ids; | ||
| 70 | |||
| 71 | std::vector<u32> slice_offsets; | ||
| 72 | std::vector<SubresourceBase> slice_subresources; | ||
| 73 | |||
| 74 | std::vector<AliasedImage> aliased_images; | ||
| 75 | }; | ||
| 76 | |||
| 77 | struct ImageAllocBase { | ||
| 78 | std::vector<ImageId> images; | ||
| 79 | }; | ||
| 80 | |||
| 81 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); | ||
| 82 | |||
| 83 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/surface.h" | ||
| 7 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 8 | #include "video_core/texture_cache/image_info.h" | ||
| 9 | #include "video_core/texture_cache/samples_helper.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/texture_cache/util.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::TextureType; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | ImageInfo::ImageInfo(const TICEntry& config) noexcept { | ||
| 21 | format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 22 | config.a_type, config.srgb_conversion); | ||
| 23 | num_samples = NumSamples(config.msaa_mode); | ||
| 24 | resources.levels = config.max_mip_level + 1; | ||
| 25 | if (config.IsPitchLinear()) { | ||
| 26 | pitch = config.Pitch(); | ||
| 27 | } else if (config.IsBlockLinear()) { | ||
| 28 | block = Extent3D{ | ||
| 29 | .width = config.block_width, | ||
| 30 | .height = config.block_height, | ||
| 31 | .depth = config.block_depth, | ||
| 32 | }; | ||
| 33 | } | ||
| 34 | tile_width_spacing = config.tile_width_spacing; | ||
| 35 | if (config.texture_type != TextureType::Texture2D && | ||
| 36 | config.texture_type != TextureType::Texture2DNoMipmap) { | ||
| 37 | ASSERT(!config.IsPitchLinear()); | ||
| 38 | } | ||
| 39 | switch (config.texture_type) { | ||
| 40 | case TextureType::Texture1D: | ||
| 41 | ASSERT(config.BaseLayer() == 0); | ||
| 42 | type = ImageType::e1D; | ||
| 43 | size.width = config.Width(); | ||
| 44 | break; | ||
| 45 | case TextureType::Texture1DArray: | ||
| 46 | UNIMPLEMENTED_IF(config.BaseLayer() != 0); | ||
| 47 | type = ImageType::e1D; | ||
| 48 | size.width = config.Width(); | ||
| 49 | resources.layers = config.Depth(); | ||
| 50 | break; | ||
| 51 | case TextureType::Texture2D: | ||
| 52 | case TextureType::Texture2DNoMipmap: | ||
| 53 | ASSERT(config.Depth() == 1); | ||
| 54 | type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; | ||
| 55 | size.width = config.Width(); | ||
| 56 | size.height = config.Height(); | ||
| 57 | resources.layers = config.BaseLayer() + 1; | ||
| 58 | break; | ||
| 59 | case TextureType::Texture2DArray: | ||
| 60 | type = ImageType::e2D; | ||
| 61 | size.width = config.Width(); | ||
| 62 | size.height = config.Height(); | ||
| 63 | resources.layers = config.BaseLayer() + config.Depth(); | ||
| 64 | break; | ||
| 65 | case TextureType::TextureCubemap: | ||
| 66 | ASSERT(config.Depth() == 1); | ||
| 67 | type = ImageType::e2D; | ||
| 68 | size.width = config.Width(); | ||
| 69 | size.height = config.Height(); | ||
| 70 | resources.layers = config.BaseLayer() + 6; | ||
| 71 | break; | ||
| 72 | case TextureType::TextureCubeArray: | ||
| 73 | UNIMPLEMENTED_IF(config.load_store_hint != 0); | ||
| 74 | type = ImageType::e2D; | ||
| 75 | size.width = config.Width(); | ||
| 76 | size.height = config.Height(); | ||
| 77 | resources.layers = config.BaseLayer() + config.Depth() * 6; | ||
| 78 | break; | ||
| 79 | case TextureType::Texture3D: | ||
| 80 | ASSERT(config.BaseLayer() == 0); | ||
| 81 | type = ImageType::e3D; | ||
| 82 | size.width = config.Width(); | ||
| 83 | size.height = config.Height(); | ||
| 84 | size.depth = config.Depth(); | ||
| 85 | break; | ||
| 86 | case TextureType::Texture1DBuffer: | ||
| 87 | type = ImageType::Buffer; | ||
| 88 | size.width = config.Width(); | ||
| 89 | break; | ||
| 90 | default: | ||
| 91 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | if (type != ImageType::Linear) { | ||
| 95 | // FIXME: Call this without passing *this | ||
| 96 | layer_stride = CalculateLayerStride(*this); | ||
| 97 | maybe_unaligned_layer_stride = CalculateLayerSize(*this); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { | ||
| 102 | const auto& rt = regs.rt[index]; | ||
| 103 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); | ||
| 104 | if (rt.tile_mode.is_pitch_linear) { | ||
| 105 | ASSERT(rt.tile_mode.is_3d == 0); | ||
| 106 | type = ImageType::Linear; | ||
| 107 | pitch = rt.width; | ||
| 108 | size = Extent3D{ | ||
| 109 | .width = pitch / BytesPerBlock(format), | ||
| 110 | .height = rt.height, | ||
| 111 | .depth = 1, | ||
| 112 | }; | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | size.width = rt.width; | ||
| 116 | size.height = rt.height; | ||
| 117 | layer_stride = rt.layer_stride * 4; | ||
| 118 | maybe_unaligned_layer_stride = layer_stride; | ||
| 119 | num_samples = NumSamples(regs.multisample_mode); | ||
| 120 | block = Extent3D{ | ||
| 121 | .width = rt.tile_mode.block_width, | ||
| 122 | .height = rt.tile_mode.block_height, | ||
| 123 | .depth = rt.tile_mode.block_depth, | ||
| 124 | }; | ||
| 125 | if (rt.tile_mode.is_3d) { | ||
| 126 | type = ImageType::e3D; | ||
| 127 | size.depth = rt.depth; | ||
| 128 | } else { | ||
| 129 | type = ImageType::e2D; | ||
| 130 | resources.layers = rt.depth; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | ||
| 135 | format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 136 | size.width = regs.zeta_width; | ||
| 137 | size.height = regs.zeta_height; | ||
| 138 | resources.levels = 1; | ||
| 139 | layer_stride = regs.zeta.layer_stride * 4; | ||
| 140 | maybe_unaligned_layer_stride = layer_stride; | ||
| 141 | num_samples = NumSamples(regs.multisample_mode); | ||
| 142 | block = Extent3D{ | ||
| 143 | .width = regs.zeta.tile_mode.block_width, | ||
| 144 | .height = regs.zeta.tile_mode.block_height, | ||
| 145 | .depth = regs.zeta.tile_mode.block_depth, | ||
| 146 | }; | ||
| 147 | if (regs.zeta.tile_mode.is_pitch_linear) { | ||
| 148 | ASSERT(regs.zeta.tile_mode.is_3d == 0); | ||
| 149 | type = ImageType::Linear; | ||
| 150 | pitch = size.width * BytesPerBlock(format); | ||
| 151 | } else if (regs.zeta.tile_mode.is_3d) { | ||
| 152 | ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); | ||
| 153 | type = ImageType::e3D; | ||
| 154 | size.depth = regs.zeta_depth; | ||
| 155 | } else { | ||
| 156 | type = ImageType::e2D; | ||
| 157 | resources.layers = regs.zeta_depth; | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | ||
| 162 | UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); | ||
| 163 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); | ||
| 164 | if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { | ||
| 165 | type = ImageType::Linear; | ||
| 166 | size = Extent3D{ | ||
| 167 | .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), | ||
| 168 | .height = config.height, | ||
| 169 | .depth = 1, | ||
| 170 | }; | ||
| 171 | pitch = config.pitch; | ||
| 172 | } else { | ||
| 173 | type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; | ||
| 174 | block = Extent3D{ | ||
| 175 | .width = config.block_width, | ||
| 176 | .height = config.block_height, | ||
| 177 | .depth = config.block_depth, | ||
| 178 | }; | ||
| 179 | // 3D blits with more than once slice are not implemented for now | ||
| 180 | // Render to individual slices | ||
| 181 | size = Extent3D{ | ||
| 182 | .width = config.width, | ||
| 183 | .height = config.height, | ||
| 184 | .depth = 1, | ||
| 185 | }; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/fermi_2d.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/surface.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | using Tegra::Texture::TICEntry; | ||
| 15 | using VideoCore::Surface::PixelFormat; | ||
| 16 | |||
| 17 | struct ImageInfo { | ||
| 18 | explicit ImageInfo() = default; | ||
| 19 | explicit ImageInfo(const TICEntry& config) noexcept; | ||
| 20 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; | ||
| 21 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; | ||
| 22 | explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; | ||
| 23 | |||
| 24 | PixelFormat format = PixelFormat::Invalid; | ||
| 25 | ImageType type = ImageType::e1D; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | Extent3D size{1, 1, 1}; | ||
| 28 | union { | ||
| 29 | Extent3D block{0, 0, 0}; | ||
| 30 | u32 pitch; | ||
| 31 | }; | ||
| 32 | u32 layer_stride = 0; | ||
| 33 | u32 maybe_unaligned_layer_stride = 0; | ||
| 34 | u32 num_samples = 1; | ||
| 35 | u32 tile_width_spacing = 0; | ||
| 36 | }; | ||
| 37 | |||
| 38 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..076a4bcfd --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/compatible_formats.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/formatter.h" | ||
| 12 | #include "video_core/texture_cache/image_info.h" | ||
| 13 | #include "video_core/texture_cache/image_view_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 20 | ImageId image_id_) | ||
| 21 | : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, | ||
| 22 | size{ | ||
| 23 | .width = std::max(image_info.size.width >> range.base.level, 1u), | ||
| 24 | .height = std::max(image_info.size.height >> range.base.level, 1u), | ||
| 25 | .depth = std::max(image_info.size.depth >> range.base.level, 1u), | ||
| 26 | } { | ||
| 27 | ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), | ||
| 28 | "Image view format {} is incompatible with image format {}", info.format, | ||
| 29 | image_info.format); | ||
| 30 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 31 | if (image_info.type == ImageType::Linear && is_async) { | ||
| 32 | flags |= ImageViewFlagBits::PreemtiveDownload; | ||
| 33 | } | ||
| 34 | if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { | ||
| 35 | flags |= ImageViewFlagBits::Slice; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | ImageViewBase::ImageViewBase(const NullImageParams&) {} | ||
| 40 | |||
| 41 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | using VideoCore::Surface::PixelFormat; | ||
| 14 | |||
| 15 | struct ImageViewInfo; | ||
| 16 | struct ImageInfo; | ||
| 17 | |||
| 18 | struct NullImageParams {}; | ||
| 19 | |||
| 20 | enum class ImageViewFlagBits : u16 { | ||
| 21 | PreemtiveDownload = 1 << 0, | ||
| 22 | Strong = 1 << 1, | ||
| 23 | Slice = 1 << 2, | ||
| 24 | }; | ||
| 25 | DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) | ||
| 26 | |||
| 27 | struct ImageViewBase { | ||
| 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 29 | ImageId image_id); | ||
| 30 | explicit ImageViewBase(const NullImageParams&); | ||
| 31 | |||
| 32 | [[nodiscard]] bool IsBuffer() const noexcept { | ||
| 33 | return type == ImageViewType::Buffer; | ||
| 34 | } | ||
| 35 | |||
| 36 | ImageId image_id{}; | ||
| 37 | PixelFormat format{}; | ||
| 38 | ImageViewType type{}; | ||
| 39 | SubresourceRange range; | ||
| 40 | Extent3D size{0, 0, 0}; | ||
| 41 | ImageViewFlagBits flags{}; | ||
| 42 | |||
| 43 | u64 invalidation_tick = 0; | ||
| 44 | u64 modification_tick = 0; | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "video_core/texture_cache/image_view_info.h" | ||
| 9 | #include "video_core/texture_cache/texture_cache.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/textures/texture.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); | ||
| 18 | |||
| 19 | [[nodiscard]] u8 CastSwizzle(SwizzleSource source) { | ||
| 20 | const u8 casted = static_cast<u8>(source); | ||
| 21 | ASSERT(static_cast<SwizzleSource>(casted) == source); | ||
| 22 | return casted; | ||
| 23 | } | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept | ||
| 28 | : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, | ||
| 29 | y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, | ||
| 30 | w_source{CastSwizzle(config.w_source)} { | ||
| 31 | range.base = SubresourceBase{ | ||
| 32 | .level = static_cast<s32>(config.res_min_mip_level), | ||
| 33 | .layer = base_layer, | ||
| 34 | }; | ||
| 35 | range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; | ||
| 36 | |||
| 37 | switch (config.texture_type) { | ||
| 38 | case TextureType::Texture1D: | ||
| 39 | ASSERT(config.Height() == 1); | ||
| 40 | ASSERT(config.Depth() == 1); | ||
| 41 | type = ImageViewType::e1D; | ||
| 42 | break; | ||
| 43 | case TextureType::Texture2D: | ||
| 44 | case TextureType::Texture2DNoMipmap: | ||
| 45 | ASSERT(config.Depth() == 1); | ||
| 46 | type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; | ||
| 47 | break; | ||
| 48 | case TextureType::Texture3D: | ||
| 49 | type = ImageViewType::e3D; | ||
| 50 | break; | ||
| 51 | case TextureType::TextureCubemap: | ||
| 52 | ASSERT(config.Depth() == 1); | ||
| 53 | type = ImageViewType::Cube; | ||
| 54 | range.extent.layers = 6; | ||
| 55 | break; | ||
| 56 | case TextureType::Texture1DArray: | ||
| 57 | type = ImageViewType::e1DArray; | ||
| 58 | range.extent.layers = config.Depth(); | ||
| 59 | break; | ||
| 60 | case TextureType::Texture2DArray: | ||
| 61 | type = ImageViewType::e2DArray; | ||
| 62 | range.extent.layers = config.Depth(); | ||
| 63 | break; | ||
| 64 | case TextureType::Texture1DBuffer: | ||
| 65 | type = ImageViewType::Buffer; | ||
| 66 | break; | ||
| 67 | case TextureType::TextureCubeArray: | ||
| 68 | type = ImageViewType::CubeArray; | ||
| 69 | range.extent.layers = config.Depth() * 6; | ||
| 70 | break; | ||
| 71 | default: | ||
| 72 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, | ||
| 78 | SubresourceRange range_) noexcept | ||
| 79 | : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, | ||
| 80 | y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, | ||
| 81 | w_source{RENDER_TARGET_SWIZZLE} {} | ||
| 82 | |||
| 83 | bool ImageViewInfo::IsRenderTarget() const noexcept { | ||
| 84 | return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && | ||
| 85 | z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::SwizzleSource; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | /// Properties used to determine a image view | ||
| 21 | struct ImageViewInfo { | ||
| 22 | explicit ImageViewInfo() noexcept = default; | ||
| 23 | explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; | ||
| 24 | explicit ImageViewInfo(ImageViewType type, PixelFormat format, | ||
| 25 | SubresourceRange range = {}) noexcept; | ||
| 26 | |||
| 27 | auto operator<=>(const ImageViewInfo&) const noexcept = default; | ||
| 28 | |||
| 29 | [[nodiscard]] bool IsRenderTarget() const noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept { | ||
| 32 | return std::array{ | ||
| 33 | static_cast<SwizzleSource>(x_source), | ||
| 34 | static_cast<SwizzleSource>(y_source), | ||
| 35 | static_cast<SwizzleSource>(z_source), | ||
| 36 | static_cast<SwizzleSource>(w_source), | ||
| 37 | }; | ||
| 38 | } | ||
| 39 | |||
| 40 | ImageViewType type{}; | ||
| 41 | PixelFormat format{}; | ||
| 42 | SubresourceRange range; | ||
| 43 | u8 x_source = static_cast<u8>(SwizzleSource::R); | ||
| 44 | u8 y_source = static_cast<u8>(SwizzleSource::G); | ||
| 45 | u8 z_source = static_cast<u8>(SwizzleSource::B); | ||
| 46 | u8 w_source = static_cast<u8>(SwizzleSource::A); | ||
| 47 | }; | ||
| 48 | static_assert(std::has_unique_object_representations_v<ImageViewInfo>); | ||
| 49 | |||
| 50 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | /// Framebuffer properties used to lookup a framebuffer | ||
| 17 | struct RenderTargets { | ||
| 18 | constexpr auto operator<=>(const RenderTargets&) const noexcept = default; | ||
| 19 | |||
| 20 | constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { | ||
| 21 | const auto contains = [elements](ImageViewId item) { | ||
| 22 | return std::ranges::find(elements, item) != elements.end(); | ||
| 23 | }; | ||
| 24 | return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::array<ImageViewId, NUM_RT> color_buffer_ids; | ||
| 28 | ImageViewId depth_buffer_id; | ||
| 29 | std::array<u8, NUM_RT> draw_buffers{}; | ||
| 30 | Extent2D size; | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
| 34 | |||
| 35 | namespace std { | ||
| 36 | |||
| 37 | template <> | ||
| 38 | struct hash<VideoCommon::RenderTargets> { | ||
| 39 | size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { | ||
| 40 | using VideoCommon::ImageViewId; | ||
| 41 | size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id); | ||
| 42 | for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { | ||
| 43 | value ^= std::hash<ImageViewId>{}(color_buffer_id); | ||
| 44 | } | ||
| 45 | value ^= Common::BitCast<u64>(rt.draw_buffers); | ||
| 46 | value ^= Common::BitCast<u64>(rt.size); | ||
| 47 | return value; | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | [[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) { | ||
| 15 | switch (num_samples) { | ||
| 16 | case 1: | ||
| 17 | return {0, 0}; | ||
| 18 | case 2: | ||
| 19 | return {1, 0}; | ||
| 20 | case 4: | ||
| 21 | return {1, 1}; | ||
| 22 | case 8: | ||
| 23 | return {2, 1}; | ||
| 24 | case 16: | ||
| 25 | return {2, 2}; | ||
| 26 | } | ||
| 27 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 28 | return {1, 1}; | ||
| 29 | } | ||
| 30 | |||
| 31 | [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 32 | using Tegra::Texture::MsaaMode; | ||
| 33 | switch (msaa_mode) { | ||
| 34 | case MsaaMode::Msaa1x1: | ||
| 35 | return 1; | ||
| 36 | case MsaaMode::Msaa2x1: | ||
| 37 | case MsaaMode::Msaa2x1_D3D: | ||
| 38 | return 2; | ||
| 39 | case MsaaMode::Msaa2x2: | ||
| 40 | case MsaaMode::Msaa2x2_VC4: | ||
| 41 | case MsaaMode::Msaa2x2_VC12: | ||
| 42 | return 4; | ||
| 43 | case MsaaMode::Msaa4x2: | ||
| 44 | case MsaaMode::Msaa4x2_D3D: | ||
| 45 | case MsaaMode::Msaa4x2_VC8: | ||
| 46 | case MsaaMode::Msaa4x2_VC24: | ||
| 47 | return 8; | ||
| 48 | case MsaaMode::Msaa4x4: | ||
| 49 | return 16; | ||
| 50 | } | ||
| 51 | UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode)); | ||
| 52 | return 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <concepts> | ||
| 9 | #include <numeric> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | struct SlotId { | ||
| 20 | static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); | ||
| 21 | |||
| 22 | constexpr auto operator<=>(const SlotId&) const noexcept = default; | ||
| 23 | |||
| 24 | constexpr explicit operator bool() const noexcept { | ||
| 25 | return index != INVALID_INDEX; | ||
| 26 | } | ||
| 27 | |||
| 28 | u32 index = INVALID_INDEX; | ||
| 29 | }; | ||
| 30 | |||
| 31 | template <class T> | ||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | ||
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | ||
| 34 | public: | ||
| 35 | ~SlotVector() noexcept { | ||
| 36 | size_t index = 0; | ||
| 37 | for (u64 bits : stored_bitset) { | ||
| 38 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 39 | if ((bits & 1) != 0) { | ||
| 40 | values[index + bit].object.~T(); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | index += 64; | ||
| 44 | } | ||
| 45 | delete[] values; | ||
| 46 | } | ||
| 47 | |||
| 48 | [[nodiscard]] T& operator[](SlotId id) noexcept { | ||
| 49 | ValidateIndex(id); | ||
| 50 | return values[id.index].object; | ||
| 51 | } | ||
| 52 | |||
| 53 | [[nodiscard]] const T& operator[](SlotId id) const noexcept { | ||
| 54 | ValidateIndex(id); | ||
| 55 | return values[id.index].object; | ||
| 56 | } | ||
| 57 | |||
| 58 | template <typename... Args> | ||
| 59 | [[nodiscard]] SlotId insert(Args&&... args) noexcept { | ||
| 60 | const u32 index = FreeValueIndex(); | ||
| 61 | new (&values[index].object) T(std::forward<Args>(args)...); | ||
| 62 | SetStorageBit(index); | ||
| 63 | |||
| 64 | return SlotId{index}; | ||
| 65 | } | ||
| 66 | |||
| 67 | void erase(SlotId id) noexcept { | ||
| 68 | values[id.index].object.~T(); | ||
| 69 | free_list.push_back(id.index); | ||
| 70 | ResetStorageBit(id.index); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | struct NonTrivialDummy { | ||
| 75 | NonTrivialDummy() noexcept {} | ||
| 76 | }; | ||
| 77 | |||
| 78 | union Entry { | ||
| 79 | Entry() noexcept : dummy{} {} | ||
| 80 | ~Entry() noexcept {} | ||
| 81 | |||
| 82 | NonTrivialDummy dummy; | ||
| 83 | T object; | ||
| 84 | }; | ||
| 85 | |||
| 86 | void SetStorageBit(u32 index) noexcept { | ||
| 87 | stored_bitset[index / 64] |= u64(1) << (index % 64); | ||
| 88 | } | ||
| 89 | |||
| 90 | void ResetStorageBit(u32 index) noexcept { | ||
| 91 | stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); | ||
| 92 | } | ||
| 93 | |||
| 94 | bool ReadStorageBit(u32 index) noexcept { | ||
| 95 | return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | void ValidateIndex(SlotId id) const noexcept { | ||
| 99 | DEBUG_ASSERT(id); | ||
| 100 | DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); | ||
| 101 | DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); | ||
| 102 | } | ||
| 103 | |||
| 104 | [[nodiscard]] u32 FreeValueIndex() noexcept { | ||
| 105 | if (free_list.empty()) { | ||
| 106 | Reserve(values_capacity ? (values_capacity << 1) : 1); | ||
| 107 | } | ||
| 108 | const u32 free_index = free_list.back(); | ||
| 109 | free_list.pop_back(); | ||
| 110 | return free_index; | ||
| 111 | } | ||
| 112 | |||
| 113 | void Reserve(size_t new_capacity) noexcept { | ||
| 114 | Entry* const new_values = new Entry[new_capacity]; | ||
| 115 | size_t index = 0; | ||
| 116 | for (u64 bits : stored_bitset) { | ||
| 117 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 118 | const size_t i = index + bit; | ||
| 119 | if ((bits & 1) == 0) { | ||
| 120 | continue; | ||
| 121 | } | ||
| 122 | T& old_value = values[i].object; | ||
| 123 | new (&new_values[i].object) T(std::move(old_value)); | ||
| 124 | old_value.~T(); | ||
| 125 | } | ||
| 126 | index += 64; | ||
| 127 | } | ||
| 128 | |||
| 129 | stored_bitset.resize((new_capacity + 63) / 64); | ||
| 130 | |||
| 131 | const size_t old_free_size = free_list.size(); | ||
| 132 | free_list.resize(old_free_size + (new_capacity - values_capacity)); | ||
| 133 | std::iota(free_list.begin() + old_free_size, free_list.end(), | ||
| 134 | static_cast<u32>(values_capacity)); | ||
| 135 | |||
| 136 | delete[] values; | ||
| 137 | values = new_values; | ||
| 138 | values_capacity = new_capacity; | ||
| 139 | } | ||
| 140 | |||
| 141 | Entry* values = nullptr; | ||
| 142 | size_t values_capacity = 0; | ||
| 143 | size_t values_size = 0; | ||
| 144 | |||
| 145 | std::vector<u64> stored_bitset; | ||
| 146 | std::vector<u32> free_list; | ||
| 147 | }; | ||
| 148 | |||
| 149 | } // namespace VideoCommon | ||
| 150 | |||
| 151 | template <> | ||
| 152 | struct std::hash<VideoCommon::SlotId> { | ||
| 153 | size_t operator()(const VideoCommon::SlotId& id) const noexcept { | ||
| 154 | return std::hash<u32>{}(id.index); | ||
| 155 | } | ||
| 156 | }; | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index efbcf6723..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null | |||
| @@ -1,299 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/algorithm.h" | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/texture_cache/surface_base.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | #include "video_core/textures/convert.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); | ||
| 17 | MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); | ||
| 18 | |||
| 19 | using Tegra::Texture::ConvertFromGuestToHost; | ||
| 20 | using VideoCore::MortonSwizzleMode; | ||
| 21 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 22 | using VideoCore::Surface::PixelFormat; | ||
| 23 | |||
| 24 | StagingCache::StagingCache() = default; | ||
| 25 | |||
| 26 | StagingCache::~StagingCache() = default; | ||
| 27 | |||
| 28 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 29 | bool is_astc_supported_) | ||
| 30 | : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), | ||
| 31 | mipmap_offsets(params.num_levels) { | ||
| 32 | is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; | ||
| 33 | host_memory_size = params.GetHostSizeInBytes(is_converted); | ||
| 34 | |||
| 35 | std::size_t offset = 0; | ||
| 36 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 37 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||
| 38 | mipmap_sizes[level] = mipmap_size; | ||
| 39 | mipmap_offsets[level] = offset; | ||
| 40 | offset += mipmap_size; | ||
| 41 | } | ||
| 42 | layer_size = offset; | ||
| 43 | if (params.is_layered) { | ||
| 44 | if (params.is_tiled) { | ||
| 45 | layer_size = | ||
| 46 | SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||
| 47 | } | ||
| 48 | guest_memory_size = layer_size * params.depth; | ||
| 49 | } else { | ||
| 50 | guest_memory_size = layer_size; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { | ||
| 55 | const u32 src_bpp{params.GetBytesPerPixel()}; | ||
| 56 | const u32 dst_bpp{rhs.GetBytesPerPixel()}; | ||
| 57 | const bool ib1 = params.IsBuffer(); | ||
| 58 | const bool ib2 = rhs.IsBuffer(); | ||
| 59 | if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { | ||
| 60 | const bool cb1 = params.IsCompressed(); | ||
| 61 | const bool cb2 = rhs.IsCompressed(); | ||
| 62 | if (cb1 == cb2) { | ||
| 63 | return MatchTopologyResult::FullMatch; | ||
| 64 | } | ||
| 65 | return MatchTopologyResult::CompressUnmatch; | ||
| 66 | } | ||
| 67 | return MatchTopologyResult::None; | ||
| 68 | } | ||
| 69 | |||
| 70 | MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { | ||
| 71 | // Buffer surface Check | ||
| 72 | if (params.IsBuffer()) { | ||
| 73 | const std::size_t wd1 = params.width * params.GetBytesPerPixel(); | ||
| 74 | const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); | ||
| 75 | if (wd1 == wd2) { | ||
| 76 | return MatchStructureResult::FullMatch; | ||
| 77 | } | ||
| 78 | return MatchStructureResult::None; | ||
| 79 | } | ||
| 80 | |||
| 81 | // Linear Surface check | ||
| 82 | if (!params.is_tiled) { | ||
| 83 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { | ||
| 84 | if (params.width == rhs.width) { | ||
| 85 | return MatchStructureResult::FullMatch; | ||
| 86 | } else { | ||
| 87 | return MatchStructureResult::SemiMatch; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | return MatchStructureResult::None; | ||
| 91 | } | ||
| 92 | |||
| 93 | // Tiled Surface check | ||
| 94 | if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, | ||
| 95 | params.tile_width_spacing, params.num_levels) == | ||
| 96 | std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 97 | rhs.tile_width_spacing, rhs.num_levels)) { | ||
| 98 | if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { | ||
| 99 | return MatchStructureResult::FullMatch; | ||
| 100 | } | ||
| 101 | const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, | ||
| 102 | rhs.pixel_format); | ||
| 103 | const u32 hs = | ||
| 104 | SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); | ||
| 105 | const u32 w1 = params.GetBlockAlignedWidth(); | ||
| 106 | if (std::tie(w1, params.height) == std::tie(ws, hs)) { | ||
| 107 | return MatchStructureResult::SemiMatch; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | return MatchStructureResult::None; | ||
| 111 | } | ||
| 112 | |||
| 113 | std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | ||
| 114 | const GPUVAddr candidate_gpu_addr) const { | ||
| 115 | if (gpu_addr == candidate_gpu_addr) { | ||
| 116 | return {{0, 0}}; | ||
| 117 | } | ||
| 118 | |||
| 119 | if (candidate_gpu_addr < gpu_addr) { | ||
| 120 | return std::nullopt; | ||
| 121 | } | ||
| 122 | |||
| 123 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | ||
| 124 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||
| 125 | if (layer >= params.depth) { | ||
| 126 | return std::nullopt; | ||
| 127 | } | ||
| 128 | |||
| 129 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||
| 130 | const auto mipmap_it = | ||
| 131 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||
| 132 | if (mipmap_it == mipmap_offsets.end()) { | ||
| 133 | return std::nullopt; | ||
| 134 | } | ||
| 135 | |||
| 136 | const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; | ||
| 137 | return std::make_pair(layer, level); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { | ||
| 141 | const u32 layers{params.depth}; | ||
| 142 | const u32 mipmaps{params.num_levels}; | ||
| 143 | std::vector<CopyParams> result; | ||
| 144 | result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); | ||
| 145 | |||
| 146 | for (u32 layer = 0; layer < layers; layer++) { | ||
| 147 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 148 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 149 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 150 | result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | return result; | ||
| 154 | } | ||
| 155 | |||
| 156 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { | ||
| 157 | const u32 mipmaps{params.num_levels}; | ||
| 158 | std::vector<CopyParams> result; | ||
| 159 | result.reserve(mipmaps); | ||
| 160 | |||
| 161 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 162 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 163 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 164 | const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; | ||
| 165 | result.emplace_back(width, height, depth, level); | ||
| 166 | } | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, | ||
| 171 | const SurfaceParams& surface_params, u8* buffer, u32 level) { | ||
| 172 | const u32 width{surface_params.GetMipWidth(level)}; | ||
| 173 | const u32 height{surface_params.GetMipHeight(level)}; | ||
| 174 | const u32 block_height{surface_params.GetMipBlockHeight(level)}; | ||
| 175 | const u32 block_depth{surface_params.GetMipBlockDepth(level)}; | ||
| 176 | |||
| 177 | std::size_t guest_offset{mipmap_offsets[level]}; | ||
| 178 | if (surface_params.is_layered) { | ||
| 179 | std::size_t host_offset = 0; | ||
| 180 | const std::size_t guest_stride = layer_size; | ||
| 181 | const std::size_t host_stride = surface_params.GetHostLayerSize(level); | ||
| 182 | for (u32 layer = 0; layer < surface_params.depth; ++layer) { | ||
| 183 | MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, | ||
| 184 | block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, | ||
| 185 | memory + guest_offset); | ||
| 186 | guest_offset += guest_stride; | ||
| 187 | host_offset += host_stride; | ||
| 188 | } | ||
| 189 | } else { | ||
| 190 | MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, | ||
| 191 | surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, | ||
| 192 | memory + guest_offset); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | ||
| 197 | StagingCache& staging_cache) { | ||
| 198 | MICROPROFILE_SCOPE(GPU_Load_Texture); | ||
| 199 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 200 | u8* host_ptr; | ||
| 201 | // Use an extra temporal buffer | ||
| 202 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 203 | tmp_buffer.resize(guest_memory_size); | ||
| 204 | host_ptr = tmp_buffer.data(); | ||
| 205 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 206 | |||
| 207 | if (params.is_tiled) { | ||
| 208 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||
| 209 | params.block_width, static_cast<u32>(params.target)); | ||
| 210 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 211 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 212 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | ||
| 213 | staging_buffer.data() + host_offset, level); | ||
| 214 | } | ||
| 215 | } else { | ||
| 216 | ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); | ||
| 217 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 218 | const u32 block_width{params.GetDefaultBlockWidth()}; | ||
| 219 | const u32 block_height{params.GetDefaultBlockHeight()}; | ||
| 220 | const u32 width{(params.width + block_width - 1) / block_width}; | ||
| 221 | const u32 height{(params.height + block_height - 1) / block_height}; | ||
| 222 | const u32 copy_size{width * bpp}; | ||
| 223 | if (params.pitch == copy_size) { | ||
| 224 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); | ||
| 225 | } else { | ||
| 226 | const u8* start{host_ptr}; | ||
| 227 | u8* write_to{staging_buffer.data()}; | ||
| 228 | for (u32 h = height; h > 0; --h) { | ||
| 229 | std::memcpy(write_to, start, copy_size); | ||
| 230 | start += params.pitch; | ||
| 231 | write_to += copy_size; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | |||
| 240 | for (u32 level = params.num_levels; level--;) { | ||
| 241 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 242 | const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; | ||
| 243 | u8* const in_buffer = staging_buffer.data() + in_host_offset; | ||
| 244 | u8* const out_buffer = staging_buffer.data() + out_host_offset; | ||
| 245 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | ||
| 246 | params.GetMipWidth(level), params.GetMipHeight(level), | ||
| 247 | params.GetMipDepth(level), true, true); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | ||
| 252 | StagingCache& staging_cache) { | ||
| 253 | MICROPROFILE_SCOPE(GPU_Flush_Texture); | ||
| 254 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 255 | u8* host_ptr; | ||
| 256 | |||
| 257 | // Use an extra temporal buffer | ||
| 258 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 259 | tmp_buffer.resize(guest_memory_size); | ||
| 260 | host_ptr = tmp_buffer.data(); | ||
| 261 | |||
| 262 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 263 | // Special case for 3D texture segments | ||
| 264 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 265 | } | ||
| 266 | |||
| 267 | if (params.is_tiled) { | ||
| 268 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||
| 269 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 270 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 271 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | ||
| 272 | staging_buffer.data() + host_offset, level); | ||
| 273 | } | ||
| 274 | } else if (params.IsBuffer()) { | ||
| 275 | // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest | ||
| 276 | // memory. | ||
| 277 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 278 | } else { | ||
| 279 | ASSERT(params.target == SurfaceTarget::Texture2D); | ||
| 280 | ASSERT(params.num_levels == 1); | ||
| 281 | |||
| 282 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 283 | const u32 copy_size{params.width * bpp}; | ||
| 284 | if (params.pitch == copy_size) { | ||
| 285 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 286 | } else { | ||
| 287 | u8* start{host_ptr}; | ||
| 288 | const u8* read_to{staging_buffer.data()}; | ||
| 289 | for (u32 h = params.height; h > 0; --h) { | ||
| 290 | std::memcpy(start, read_to, copy_size); | ||
| 291 | start += params.pitch; | ||
| 292 | read_to += copy_size; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | } | ||
| 296 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 297 | } | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index b57135fe4..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null | |||
| @@ -1,333 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <tuple> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/gpu.h" | ||
| 14 | #include "video_core/morton.h" | ||
| 15 | #include "video_core/texture_cache/copy_params.h" | ||
| 16 | #include "video_core/texture_cache/surface_params.h" | ||
| 17 | #include "video_core/texture_cache/surface_view.h" | ||
| 18 | |||
| 19 | namespace Tegra { | ||
| 20 | class MemoryManager; | ||
| 21 | } | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using VideoCore::MortonSwizzleMode; | ||
| 26 | using VideoCore::Surface::SurfaceTarget; | ||
| 27 | |||
| 28 | enum class MatchStructureResult : u32 { | ||
| 29 | FullMatch = 0, | ||
| 30 | SemiMatch = 1, | ||
| 31 | None = 2, | ||
| 32 | }; | ||
| 33 | |||
| 34 | enum class MatchTopologyResult : u32 { | ||
| 35 | FullMatch = 0, | ||
| 36 | CompressUnmatch = 1, | ||
| 37 | None = 2, | ||
| 38 | }; | ||
| 39 | |||
| 40 | class StagingCache { | ||
| 41 | public: | ||
| 42 | explicit StagingCache(); | ||
| 43 | ~StagingCache(); | ||
| 44 | |||
| 45 | std::vector<u8>& GetBuffer(std::size_t index) { | ||
| 46 | return staging_buffer[index]; | ||
| 47 | } | ||
| 48 | |||
| 49 | const std::vector<u8>& GetBuffer(std::size_t index) const { | ||
| 50 | return staging_buffer[index]; | ||
| 51 | } | ||
| 52 | |||
| 53 | void SetSize(std::size_t size) { | ||
| 54 | staging_buffer.resize(size); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | std::vector<std::vector<u8>> staging_buffer; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class SurfaceBaseImpl { | ||
| 62 | public: | ||
| 63 | void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 64 | |||
| 65 | void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 66 | |||
| 67 | GPUVAddr GetGpuAddr() const { | ||
| 68 | return gpu_addr; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool Overlaps(const VAddr start, const VAddr end) const { | ||
| 72 | return (cpu_addr < end) && (cpu_addr_end > start); | ||
| 73 | } | ||
| 74 | |||
| 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { | ||
| 76 | const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; | ||
| 77 | return gpu_addr <= other_start && other_end <= gpu_addr_end; | ||
| 78 | } | ||
| 79 | |||
| 80 | // Use only when recycling a surface | ||
| 81 | void SetGpuAddr(const GPUVAddr new_addr) { | ||
| 82 | gpu_addr = new_addr; | ||
| 83 | } | ||
| 84 | |||
| 85 | VAddr GetCpuAddr() const { | ||
| 86 | return cpu_addr; | ||
| 87 | } | ||
| 88 | |||
| 89 | VAddr GetCpuAddrEnd() const { | ||
| 90 | return cpu_addr_end; | ||
| 91 | } | ||
| 92 | |||
| 93 | void SetCpuAddr(const VAddr new_addr) { | ||
| 94 | cpu_addr = new_addr; | ||
| 95 | cpu_addr_end = new_addr + guest_memory_size; | ||
| 96 | } | ||
| 97 | |||
| 98 | const SurfaceParams& GetSurfaceParams() const { | ||
| 99 | return params; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::size_t GetSizeInBytes() const { | ||
| 103 | return guest_memory_size; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::size_t GetHostSizeInBytes() const { | ||
| 107 | return host_memory_size; | ||
| 108 | } | ||
| 109 | |||
| 110 | std::size_t GetMipmapSize(const u32 level) const { | ||
| 111 | return mipmap_sizes[level]; | ||
| 112 | } | ||
| 113 | |||
| 114 | bool IsLinear() const { | ||
| 115 | return !params.is_tiled; | ||
| 116 | } | ||
| 117 | |||
| 118 | bool IsConverted() const { | ||
| 119 | return is_converted; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | ||
| 123 | return params.pixel_format == pixel_format; | ||
| 124 | } | ||
| 125 | |||
| 126 | VideoCore::Surface::PixelFormat GetFormat() const { | ||
| 127 | return params.pixel_format; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { | ||
| 131 | return params.target == target; | ||
| 132 | } | ||
| 133 | |||
| 134 | MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; | ||
| 135 | |||
| 136 | MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; | ||
| 137 | |||
| 138 | bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { | ||
| 139 | return std::tie(gpu_addr, params.target, params.num_levels) == | ||
| 140 | std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && | ||
| 141 | params.target == SurfaceTarget::Texture2D && params.num_levels == 1; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; | ||
| 145 | |||
| 146 | std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { | ||
| 147 | return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); | ||
| 148 | } | ||
| 149 | |||
| 150 | protected: | ||
| 151 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 152 | bool is_astc_supported_); | ||
| 153 | ~SurfaceBaseImpl() = default; | ||
| 154 | |||
| 155 | virtual void DecorateSurfaceName() = 0; | ||
| 156 | |||
| 157 | const SurfaceParams params; | ||
| 158 | std::size_t layer_size; | ||
| 159 | std::size_t guest_memory_size; | ||
| 160 | std::size_t host_memory_size; | ||
| 161 | GPUVAddr gpu_addr{}; | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | VAddr cpu_addr_end{}; | ||
| 164 | bool is_converted{}; | ||
| 165 | |||
| 166 | std::vector<std::size_t> mipmap_sizes; | ||
| 167 | std::vector<std::size_t> mipmap_offsets; | ||
| 168 | |||
| 169 | private: | ||
| 170 | void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, | ||
| 171 | u8* buffer, u32 level); | ||
| 172 | |||
| 173 | std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; | ||
| 174 | |||
| 175 | std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; | ||
| 176 | }; | ||
| 177 | |||
| 178 | template <typename TView> | ||
| 179 | class SurfaceBase : public SurfaceBaseImpl { | ||
| 180 | public: | ||
| 181 | virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; | ||
| 182 | |||
| 183 | virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; | ||
| 184 | |||
| 185 | void MarkAsModified(bool is_modified_, u64 tick) { | ||
| 186 | is_modified = is_modified_ || is_target; | ||
| 187 | modification_tick = tick; | ||
| 188 | } | ||
| 189 | |||
| 190 | void MarkAsRenderTarget(bool is_target_, u32 index_) { | ||
| 191 | is_target = is_target_; | ||
| 192 | index = index_; | ||
| 193 | } | ||
| 194 | |||
| 195 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 196 | is_memory_marked = is_memory_marked_; | ||
| 197 | } | ||
| 198 | |||
| 199 | bool IsMemoryMarked() const { | ||
| 200 | return is_memory_marked; | ||
| 201 | } | ||
| 202 | |||
| 203 | void SetSyncPending(bool is_sync_pending_) { | ||
| 204 | is_sync_pending = is_sync_pending_; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool IsSyncPending() const { | ||
| 208 | return is_sync_pending; | ||
| 209 | } | ||
| 210 | |||
| 211 | void MarkAsPicked(bool is_picked_) { | ||
| 212 | is_picked = is_picked_; | ||
| 213 | } | ||
| 214 | |||
| 215 | bool IsModified() const { | ||
| 216 | return is_modified; | ||
| 217 | } | ||
| 218 | |||
| 219 | bool IsProtected() const { | ||
| 220 | // Only 3D slices are to be protected | ||
| 221 | return is_target && params.target == SurfaceTarget::Texture3D; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool IsRenderTarget() const { | ||
| 225 | return is_target; | ||
| 226 | } | ||
| 227 | |||
| 228 | u32 GetRenderTarget() const { | ||
| 229 | return index; | ||
| 230 | } | ||
| 231 | |||
| 232 | bool IsRegistered() const { | ||
| 233 | return is_registered; | ||
| 234 | } | ||
| 235 | |||
| 236 | bool IsPicked() const { | ||
| 237 | return is_picked; | ||
| 238 | } | ||
| 239 | |||
| 240 | void MarkAsRegistered(bool is_reg) { | ||
| 241 | is_registered = is_reg; | ||
| 242 | } | ||
| 243 | |||
| 244 | u64 GetModificationTick() const { | ||
| 245 | return modification_tick; | ||
| 246 | } | ||
| 247 | |||
| 248 | TView EmplaceOverview(const SurfaceParams& overview_params) { | ||
| 249 | const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; | ||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||
| 251 | } | ||
| 252 | |||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | ||
| 259 | const GPUVAddr view_addr, | ||
| 260 | const std::size_t candidate_size, const u32 mipmap, | ||
| 261 | const u32 layer) { | ||
| 262 | const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; | ||
| 263 | if (!layer_mipmap) { | ||
| 264 | return {}; | ||
| 265 | } | ||
| 266 | const auto [end_layer, end_mipmap] = *layer_mipmap; | ||
| 267 | if (layer != end_layer) { | ||
| 268 | if (mipmap == 0 && end_mipmap == 0) { | ||
| 269 | return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); | ||
| 270 | } | ||
| 271 | return {}; | ||
| 272 | } else { | ||
| 273 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||
| 278 | const std::size_t candidate_size) { | ||
| 279 | if (params.target == SurfaceTarget::Texture3D || | ||
| 280 | view_params.target == SurfaceTarget::Texture3D || | ||
| 281 | (params.num_levels == 1 && !params.is_layered)) { | ||
| 282 | return {}; | ||
| 283 | } | ||
| 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||
| 285 | if (!layer_mipmap) { | ||
| 286 | return {}; | ||
| 287 | } | ||
| 288 | const auto [layer, mipmap] = *layer_mipmap; | ||
| 289 | if (GetMipmapSize(mipmap) != candidate_size) { | ||
| 290 | return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); | ||
| 291 | } | ||
| 292 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); | ||
| 293 | } | ||
| 294 | |||
| 295 | TView GetMainView() const { | ||
| 296 | return main_view; | ||
| 297 | } | ||
| 298 | |||
| 299 | protected: | ||
| 300 | explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 301 | bool is_astc_supported_) | ||
| 302 | : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} | ||
| 303 | |||
| 304 | ~SurfaceBase() = default; | ||
| 305 | |||
| 306 | virtual TView CreateView(const ViewParams& view_key) = 0; | ||
| 307 | |||
| 308 | TView main_view; | ||
| 309 | std::unordered_map<ViewParams, TView> views; | ||
| 310 | |||
| 311 | private: | ||
| 312 | TView GetView(const ViewParams& key) { | ||
| 313 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 314 | auto& view{entry->second}; | ||
| 315 | if (is_cache_miss) { | ||
| 316 | view = CreateView(key); | ||
| 317 | } | ||
| 318 | return view; | ||
| 319 | } | ||
| 320 | |||
| 321 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 322 | |||
| 323 | bool is_modified{}; | ||
| 324 | bool is_target{}; | ||
| 325 | bool is_registered{}; | ||
| 326 | bool is_picked{}; | ||
| 327 | bool is_memory_marked{}; | ||
| 328 | bool is_sync_pending{}; | ||
| 329 | u32 index{NO_RT}; | ||
| 330 | u64 modification_tick{}; | ||
| 331 | }; | ||
| 332 | |||
| 333 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 96f93246d..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null | |||
| @@ -1,445 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 15 | #include "video_core/texture_cache/surface_params.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 21 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 22 | using VideoCore::Surface::SurfaceTarget; | ||
| 23 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 24 | using VideoCore::Surface::SurfaceType; | ||
| 25 | |||
| 26 | namespace { | ||
| 27 | |||
| 28 | SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { | ||
| 29 | switch (type) { | ||
| 30 | case Tegra::Shader::TextureType::Texture1D: | ||
| 31 | return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; | ||
| 32 | case Tegra::Shader::TextureType::Texture2D: | ||
| 33 | return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 34 | case Tegra::Shader::TextureType::Texture3D: | ||
| 35 | ASSERT(!is_array); | ||
| 36 | return SurfaceTarget::Texture3D; | ||
| 37 | case Tegra::Shader::TextureType::TextureCube: | ||
| 38 | return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return SurfaceTarget::Texture2D; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { | ||
| 46 | switch (type) { | ||
| 47 | case Tegra::Shader::ImageType::Texture1D: | ||
| 48 | return SurfaceTarget::Texture1D; | ||
| 49 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 50 | return SurfaceTarget::TextureBuffer; | ||
| 51 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 52 | return SurfaceTarget::Texture1DArray; | ||
| 53 | case Tegra::Shader::ImageType::Texture2D: | ||
| 54 | return SurfaceTarget::Texture2D; | ||
| 55 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 56 | return SurfaceTarget::Texture2DArray; | ||
| 57 | case Tegra::Shader::ImageType::Texture3D: | ||
| 58 | return SurfaceTarget::Texture3D; | ||
| 59 | default: | ||
| 60 | UNREACHABLE(); | ||
| 61 | return SurfaceTarget::Texture2D; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 66 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 72 | const Tegra::Texture::TICEntry& tic, | ||
| 73 | const VideoCommon::Shader::Sampler& entry) { | ||
| 74 | SurfaceParams params; | ||
| 75 | params.is_tiled = tic.IsTiled(); | ||
| 76 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 77 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 78 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 79 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 80 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 81 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 82 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 83 | params.type = GetFormatType(params.pixel_format); | ||
| 84 | if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { | ||
| 85 | switch (params.pixel_format) { | ||
| 86 | case PixelFormat::R16_UNORM: | ||
| 87 | case PixelFormat::R16_FLOAT: | ||
| 88 | params.pixel_format = PixelFormat::D16_UNORM; | ||
| 89 | break; | ||
| 90 | case PixelFormat::R32_FLOAT: | ||
| 91 | params.pixel_format = PixelFormat::D32_FLOAT; | ||
| 92 | break; | ||
| 93 | default: | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | ||
| 95 | static_cast<u32>(params.pixel_format)); | ||
| 96 | } | ||
| 97 | params.type = GetFormatType(params.pixel_format); | ||
| 98 | } | ||
| 99 | // TODO: on 1DBuffer we should use the tic info. | ||
| 100 | if (tic.IsBuffer()) { | ||
| 101 | params.target = SurfaceTarget::TextureBuffer; | ||
| 102 | params.width = tic.Width(); | ||
| 103 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 104 | params.height = 1; | ||
| 105 | params.depth = 1; | ||
| 106 | params.num_levels = 1; | ||
| 107 | params.emulated_levels = 1; | ||
| 108 | params.is_layered = false; | ||
| 109 | } else { | ||
| 110 | params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 111 | params.width = tic.Width(); | ||
| 112 | params.height = tic.Height(); | ||
| 113 | params.depth = tic.Depth(); | ||
| 114 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 115 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 116 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 117 | params.depth *= 6; | ||
| 118 | } | ||
| 119 | params.num_levels = tic.max_mip_level + 1; | ||
| 120 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 121 | params.is_layered = params.IsLayered(); | ||
| 122 | } | ||
| 123 | return params; | ||
| 124 | } | ||
| 125 | |||
| 126 | SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, | ||
| 127 | const Tegra::Texture::TICEntry& tic, | ||
| 128 | const VideoCommon::Shader::Image& entry) { | ||
| 129 | SurfaceParams params; | ||
| 130 | params.is_tiled = tic.IsTiled(); | ||
| 131 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 132 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 133 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 134 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 135 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 136 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 137 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 138 | params.type = GetFormatType(params.pixel_format); | ||
| 139 | params.target = ImageTypeToSurfaceTarget(entry.type); | ||
| 140 | // TODO: on 1DBuffer we should use the tic info. | ||
| 141 | if (tic.IsBuffer()) { | ||
| 142 | params.target = SurfaceTarget::TextureBuffer; | ||
| 143 | params.width = tic.Width(); | ||
| 144 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 145 | params.height = 1; | ||
| 146 | params.depth = 1; | ||
| 147 | params.num_levels = 1; | ||
| 148 | params.emulated_levels = 1; | ||
| 149 | params.is_layered = false; | ||
| 150 | } else { | ||
| 151 | params.width = tic.Width(); | ||
| 152 | params.height = tic.Height(); | ||
| 153 | params.depth = tic.Depth(); | ||
| 154 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 155 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 156 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 157 | params.depth *= 6; | ||
| 158 | } | ||
| 159 | params.num_levels = tic.max_mip_level + 1; | ||
| 160 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 161 | params.is_layered = params.IsLayered(); | ||
| 162 | } | ||
| 163 | return params; | ||
| 164 | } | ||
| 165 | |||
| 166 | SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { | ||
| 167 | const auto& regs = maxwell3d.regs; | ||
| 168 | const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); | ||
| 169 | const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; | ||
| 170 | const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 171 | return { | ||
| 172 | .is_tiled = regs.zeta.memory_layout.type == | ||
| 173 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, | ||
| 174 | .srgb_conversion = false, | ||
| 175 | .is_layered = is_layered, | ||
| 176 | .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), | ||
| 177 | .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), | ||
| 178 | .block_depth = block_depth, | ||
| 179 | .tile_width_spacing = 1, | ||
| 180 | .width = regs.zeta_width, | ||
| 181 | .height = regs.zeta_height, | ||
| 182 | .depth = is_layered ? regs.zeta_layers.Value() : 1U, | ||
| 183 | .pitch = 0, | ||
| 184 | .num_levels = 1, | ||
| 185 | .emulated_levels = 1, | ||
| 186 | .pixel_format = pixel_format, | ||
| 187 | .type = GetFormatType(pixel_format), | ||
| 188 | .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, | ||
| 189 | }; | ||
| 190 | } | ||
| 191 | |||
| 192 | SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 193 | std::size_t index) { | ||
| 194 | const auto& config{maxwell3d.regs.rt[index]}; | ||
| 195 | SurfaceParams params; | ||
| 196 | params.is_tiled = | ||
| 197 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 198 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 199 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; | ||
| 200 | params.block_width = config.memory_layout.block_width; | ||
| 201 | params.block_height = config.memory_layout.block_height; | ||
| 202 | params.block_depth = config.memory_layout.block_depth; | ||
| 203 | params.tile_width_spacing = 1; | ||
| 204 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 205 | params.type = GetFormatType(params.pixel_format); | ||
| 206 | if (params.is_tiled) { | ||
| 207 | params.pitch = 0; | ||
| 208 | params.width = config.width; | ||
| 209 | } else { | ||
| 210 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 211 | params.pitch = config.width; | ||
| 212 | params.width = params.pitch / bpp; | ||
| 213 | } | ||
| 214 | params.height = config.height; | ||
| 215 | params.num_levels = 1; | ||
| 216 | params.emulated_levels = 1; | ||
| 217 | |||
| 218 | if (config.memory_layout.is_3d != 0) { | ||
| 219 | params.depth = config.layers.Value(); | ||
| 220 | params.is_layered = false; | ||
| 221 | params.target = SurfaceTarget::Texture3D; | ||
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 231 | return params; | ||
| 232 | } | ||
| 233 | |||
| 234 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 235 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 236 | const bool is_tiled = !config.linear; | ||
| 237 | const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 238 | |||
| 239 | SurfaceParams params{ | ||
| 240 | .is_tiled = is_tiled, | ||
| 241 | .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 242 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, | ||
| 243 | .is_layered = false, | ||
| 244 | .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, | ||
| 245 | .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, | ||
| 246 | .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, | ||
| 247 | .tile_width_spacing = 1, | ||
| 248 | .width = config.width, | ||
| 249 | .height = config.height, | ||
| 250 | .depth = 1, | ||
| 251 | .pitch = config.pitch, | ||
| 252 | .num_levels = 1, | ||
| 253 | .emulated_levels = 1, | ||
| 254 | .pixel_format = pixel_format, | ||
| 255 | .type = GetFormatType(pixel_format), | ||
| 256 | // TODO(Rodrigo): Try to guess texture arrays from parameters | ||
| 257 | .target = SurfaceTarget::Texture2D, | ||
| 258 | }; | ||
| 259 | |||
| 260 | params.is_layered = params.IsLayered(); | ||
| 261 | return params; | ||
| 262 | } | ||
| 263 | |||
| 264 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 265 | const VideoCommon::Shader::Sampler& entry) { | ||
| 266 | return TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 267 | } | ||
| 268 | |||
| 269 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 270 | const VideoCommon::Shader::Image& entry) { | ||
| 271 | return ImageTypeToSurfaceTarget(entry.type); | ||
| 272 | } | ||
| 273 | |||
| 274 | bool SurfaceParams::IsLayered() const { | ||
| 275 | switch (target) { | ||
| 276 | case SurfaceTarget::Texture1DArray: | ||
| 277 | case SurfaceTarget::Texture2DArray: | ||
| 278 | case SurfaceTarget::TextureCubemap: | ||
| 279 | case SurfaceTarget::TextureCubeArray: | ||
| 280 | return true; | ||
| 281 | default: | ||
| 282 | return false; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | // Auto block resizing algorithm from: | ||
| 287 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 288 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 289 | if (level == 0) { | ||
| 290 | return this->block_height; | ||
| 291 | } | ||
| 292 | |||
| 293 | const u32 height_new{GetMipHeight(level)}; | ||
| 294 | const u32 default_block_height{GetDefaultBlockHeight()}; | ||
| 295 | const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; | ||
| 296 | const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); | ||
| 297 | return std::clamp(block_height_new, 3U, 7U) - 3U; | ||
| 298 | } | ||
| 299 | |||
| 300 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 301 | if (level == 0) { | ||
| 302 | return this->block_depth; | ||
| 303 | } | ||
| 304 | if (is_layered) { | ||
| 305 | return 0; | ||
| 306 | } | ||
| 307 | |||
| 308 | const u32 depth_new{GetMipDepth(level)}; | ||
| 309 | const u32 block_depth_new = Common::Log2Ceil32(depth_new); | ||
| 310 | if (block_depth_new > 4) { | ||
| 311 | return 5 - (GetMipBlockHeight(level) >= 2); | ||
| 312 | } | ||
| 313 | return block_depth_new; | ||
| 314 | } | ||
| 315 | |||
| 316 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 317 | std::size_t offset = 0; | ||
| 318 | for (u32 i = 0; i < level; i++) { | ||
| 319 | offset += GetInnerMipmapMemorySize(i, false, false); | ||
| 320 | } | ||
| 321 | return offset; | ||
| 322 | } | ||
| 323 | |||
| 324 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { | ||
| 325 | std::size_t offset = 0; | ||
| 326 | if (is_converted) { | ||
| 327 | for (u32 i = 0; i < level; ++i) { | ||
| 328 | offset += GetConvertedMipmapSize(i) * GetNumLayers(); | ||
| 329 | } | ||
| 330 | } else { | ||
| 331 | for (u32 i = 0; i < level; ++i) { | ||
| 332 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | return offset; | ||
| 336 | } | ||
| 337 | |||
| 338 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | ||
| 339 | constexpr std::size_t rgba8_bpp = 4ULL; | ||
| 340 | const std::size_t mip_width = GetMipWidth(level); | ||
| 341 | const std::size_t mip_height = GetMipHeight(level); | ||
| 342 | const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); | ||
| 343 | return mip_width * mip_height * mip_depth * rgba8_bpp; | ||
| 344 | } | ||
| 345 | |||
| 346 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | ||
| 347 | std::size_t size = 0; | ||
| 348 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 349 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | ||
| 350 | } | ||
| 351 | if (is_tiled && is_layered) { | ||
| 352 | return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 353 | } | ||
| 354 | return size; | ||
| 355 | } | ||
| 356 | |||
| 357 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||
| 358 | bool uncompressed) const { | ||
| 359 | const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||
| 360 | const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||
| 361 | const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; | ||
| 362 | if (is_tiled) { | ||
| 363 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, | ||
| 364 | mip_height, mip_depth, GetMipBlockHeight(level), | ||
| 365 | GetMipBlockDepth(level)); | ||
| 366 | } else if (as_host_size || IsBuffer()) { | ||
| 367 | return GetBytesPerPixel() * mip_width * mip_height * mip_depth; | ||
| 368 | } else { | ||
| 369 | // Linear Texture Case | ||
| 370 | return pitch * mip_height * mip_depth; | ||
| 371 | } | ||
| 372 | } | ||
| 373 | |||
| 374 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||
| 375 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 376 | height, depth, pitch, num_levels, pixel_format, type, target) == | ||
| 377 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 378 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 379 | rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); | ||
| 380 | } | ||
| 381 | |||
| 382 | std::string SurfaceParams::TargetName() const { | ||
| 383 | switch (target) { | ||
| 384 | case SurfaceTarget::Texture1D: | ||
| 385 | return "1D"; | ||
| 386 | case SurfaceTarget::TextureBuffer: | ||
| 387 | return "TexBuffer"; | ||
| 388 | case SurfaceTarget::Texture2D: | ||
| 389 | return "2D"; | ||
| 390 | case SurfaceTarget::Texture3D: | ||
| 391 | return "3D"; | ||
| 392 | case SurfaceTarget::Texture1DArray: | ||
| 393 | return "1DArray"; | ||
| 394 | case SurfaceTarget::Texture2DArray: | ||
| 395 | return "2DArray"; | ||
| 396 | case SurfaceTarget::TextureCubemap: | ||
| 397 | return "Cube"; | ||
| 398 | case SurfaceTarget::TextureCubeArray: | ||
| 399 | return "CubeArray"; | ||
| 400 | default: | ||
| 401 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); | ||
| 402 | UNREACHABLE(); | ||
| 403 | return fmt::format("TUK({})", target); | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | u32 SurfaceParams::GetBlockSize() const { | ||
| 408 | const u32 x = 64U << block_width; | ||
| 409 | const u32 y = 8U << block_height; | ||
| 410 | const u32 z = 1U << block_depth; | ||
| 411 | return x * y * z; | ||
| 412 | } | ||
| 413 | |||
| 414 | std::pair<u32, u32> SurfaceParams::GetBlockXY() const { | ||
| 415 | const u32 x_pixels = 64U / GetBytesPerPixel(); | ||
| 416 | const u32 x = x_pixels << block_width; | ||
| 417 | const u32 y = 8U << block_height; | ||
| 418 | return {x, y}; | ||
| 419 | } | ||
| 420 | |||
| 421 | std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | ||
| 422 | const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 423 | const u32 block_size = GetBlockSize(); | ||
| 424 | const u32 block_index = offset / block_size; | ||
| 425 | const u32 gob_offset = offset % block_size; | ||
| 426 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); | ||
| 427 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | ||
| 428 | const u32 x_block_pixels = x_gob_pixels << block_width; | ||
| 429 | const u32 y_block_pixels = 8U << block_height; | ||
| 430 | const u32 z_block_pixels = 1U << block_depth; | ||
| 431 | const u32 x_blocks = div_ceil(width, x_block_pixels); | ||
| 432 | const u32 y_blocks = div_ceil(height, y_block_pixels); | ||
| 433 | const u32 z_blocks = div_ceil(depth, z_block_pixels); | ||
| 434 | const u32 base_x = block_index % x_blocks; | ||
| 435 | const u32 base_y = (block_index / x_blocks) % y_blocks; | ||
| 436 | const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; | ||
| 437 | u32 x = base_x * x_block_pixels; | ||
| 438 | u32 y = base_y * y_block_pixels; | ||
| 439 | u32 z = base_z * z_block_pixels; | ||
| 440 | z += gob_index >> block_height; | ||
| 441 | y += (gob_index * 8U) % y_block_pixels; | ||
| 442 | return {x, y, z}; | ||
| 443 | } | ||
| 444 | |||
| 445 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null | |||
| @@ -1,294 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "common/cityhash.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/fermi_2d.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/surface.h" | ||
| 17 | #include "video_core/textures/decoders.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | class FormatLookupTable; | ||
| 22 | |||
| 23 | class SurfaceParams { | ||
| 24 | public: | ||
| 25 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 26 | static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 27 | const Tegra::Texture::TICEntry& tic, | ||
| 28 | const VideoCommon::Shader::Sampler& entry); | ||
| 29 | |||
| 30 | /// Creates SurfaceCachedParams from an image configuration. | ||
| 31 | static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, | ||
| 32 | const Tegra::Texture::TICEntry& tic, | ||
| 33 | const VideoCommon::Shader::Image& entry); | ||
| 34 | |||
| 35 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 36 | static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); | ||
| 37 | |||
| 38 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 39 | static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 40 | std::size_t index); | ||
| 41 | |||
| 42 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 43 | static SurfaceParams CreateForFermiCopySurface( | ||
| 44 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 45 | |||
| 46 | /// Obtains the texture target from a shader's sampler entry. | ||
| 47 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 48 | const VideoCommon::Shader::Sampler& entry); | ||
| 49 | |||
| 50 | /// Obtains the texture target from a shader's sampler entry. | ||
| 51 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 52 | const VideoCommon::Shader::Image& entry); | ||
| 53 | |||
| 54 | std::size_t Hash() const { | ||
| 55 | return static_cast<std::size_t>( | ||
| 56 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool operator==(const SurfaceParams& rhs) const; | ||
| 60 | |||
| 61 | bool operator!=(const SurfaceParams& rhs) const { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | std::size_t GetGuestSizeInBytes() const { | ||
| 66 | return GetInnerMemorySize(false, false, false); | ||
| 67 | } | ||
| 68 | |||
| 69 | std::size_t GetHostSizeInBytes(bool is_converted) const { | ||
| 70 | if (!is_converted) { | ||
| 71 | return GetInnerMemorySize(true, false, false); | ||
| 72 | } | ||
| 73 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 74 | std::size_t host_size_in_bytes = 0; | ||
| 75 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 76 | host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); | ||
| 77 | } | ||
| 78 | return host_size_in_bytes; | ||
| 79 | } | ||
| 80 | |||
| 81 | u32 GetBlockAlignedWidth() const { | ||
| 82 | return Common::AlignUp(width, 64 / GetBytesPerPixel()); | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Returns the width of a given mipmap level. | ||
| 86 | u32 GetMipWidth(u32 level) const { | ||
| 87 | return std::max(1U, width >> level); | ||
| 88 | } | ||
| 89 | |||
| 90 | /// Returns the height of a given mipmap level. | ||
| 91 | u32 GetMipHeight(u32 level) const { | ||
| 92 | return std::max(1U, height >> level); | ||
| 93 | } | ||
| 94 | |||
| 95 | /// Returns the depth of a given mipmap level. | ||
| 96 | u32 GetMipDepth(u32 level) const { | ||
| 97 | return is_layered ? depth : std::max(1U, depth >> level); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// Returns the block height of a given mipmap level. | ||
| 101 | u32 GetMipBlockHeight(u32 level) const; | ||
| 102 | |||
| 103 | /// Returns the block depth of a given mipmap level. | ||
| 104 | u32 GetMipBlockDepth(u32 level) const; | ||
| 105 | |||
| 106 | /// Returns the best possible row/pitch alignment for the surface. | ||
| 107 | u32 GetRowAlignment(u32 level, bool is_converted) const { | ||
| 108 | const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); | ||
| 109 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | ||
| 110 | } | ||
| 111 | |||
| 112 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 113 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 114 | |||
| 115 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 116 | std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; | ||
| 117 | |||
| 118 | /// Returns the size in bytes in guest memory of a given mipmap level. | ||
| 119 | std::size_t GetGuestMipmapSize(u32 level) const { | ||
| 120 | return GetInnerMipmapMemorySize(level, false, false); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns the size in bytes in host memory (linear) of a given mipmap level. | ||
| 124 | std::size_t GetHostMipmapSize(u32 level) const { | ||
| 125 | return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); | ||
| 126 | } | ||
| 127 | |||
| 128 | std::size_t GetConvertedMipmapSize(u32 level) const; | ||
| 129 | |||
| 130 | /// Get this texture Tegra Block size in guest memory layout | ||
| 131 | u32 GetBlockSize() const; | ||
| 132 | |||
| 133 | /// Get X, Y coordinates max sizes of a single block. | ||
| 134 | std::pair<u32, u32> GetBlockXY() const; | ||
| 135 | |||
| 136 | /// Get the offset in x, y, z coordinates from a memory offset | ||
| 137 | std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; | ||
| 138 | |||
| 139 | /// Returns the size of a layer in bytes in guest memory. | ||
| 140 | std::size_t GetGuestLayerSize() const { | ||
| 141 | return GetLayerSize(false, false); | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 145 | std::size_t GetHostLayerSize(u32 level) const { | ||
| 146 | ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); | ||
| 147 | return GetInnerMipmapMemorySize(level, true, false); | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns the max possible mipmap that the texture can have in host gpu | ||
| 151 | u32 MaxPossibleMipmap() const { | ||
| 152 | const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; | ||
| 153 | const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; | ||
| 154 | const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); | ||
| 155 | if (target != VideoCore::Surface::SurfaceTarget::Texture3D) | ||
| 156 | return max_mipmap; | ||
| 157 | return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); | ||
| 158 | } | ||
| 159 | |||
| 160 | /// Returns if the guest surface is a compressed surface. | ||
| 161 | bool IsCompressed() const { | ||
| 162 | return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the default block width. | ||
| 166 | u32 GetDefaultBlockWidth() const { | ||
| 167 | return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||
| 168 | } | ||
| 169 | |||
| 170 | /// Returns the default block height. | ||
| 171 | u32 GetDefaultBlockHeight() const { | ||
| 172 | return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||
| 173 | } | ||
| 174 | |||
| 175 | /// Returns the bits per pixel. | ||
| 176 | u32 GetBitsPerPixel() const { | ||
| 177 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Returns the bytes per pixel. | ||
| 181 | u32 GetBytesPerPixel() const { | ||
| 182 | return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||
| 183 | } | ||
| 184 | |||
| 185 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 186 | bool IsPixelFormatZeta() const { | ||
| 187 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 188 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 189 | } | ||
| 190 | |||
| 191 | /// Returns is the surface is a TextureBuffer type of surface. | ||
| 192 | bool IsBuffer() const { | ||
| 193 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | ||
| 194 | } | ||
| 195 | |||
| 196 | /// Returns the number of layers in the surface. | ||
| 197 | std::size_t GetNumLayers() const { | ||
| 198 | return is_layered ? depth : 1; | ||
| 199 | } | ||
| 200 | |||
| 201 | /// Returns the debug name of the texture for use in graphic debuggers. | ||
| 202 | std::string TargetName() const; | ||
| 203 | |||
| 204 | // Helper used for out of class size calculations | ||
| 205 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||
| 206 | const u32 block_depth) { | ||
| 207 | return Common::AlignBits(out_size, | ||
| 208 | Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Converts a width from a type of surface into another. This helps represent the | ||
| 212 | /// equivalent value between compressed/non-compressed textures. | ||
| 213 | static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 214 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 215 | const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); | ||
| 216 | const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); | ||
| 217 | return (width * bw2 + bw1 - 1) / bw1; | ||
| 218 | } | ||
| 219 | |||
| 220 | /// Converts a height from a type of surface into another. This helps represent the | ||
| 221 | /// equivalent value between compressed/non-compressed textures. | ||
| 222 | static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 223 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 224 | const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); | ||
| 225 | const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); | ||
| 226 | return (height * bh2 + bh1 - 1) / bh1; | ||
| 227 | } | ||
| 228 | |||
| 229 | // Finds the maximun possible width between 2 2D layers of different formats | ||
| 230 | static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 231 | const u32 src_level, const u32 dst_level) { | ||
| 232 | const u32 bw1 = src_params.GetDefaultBlockWidth(); | ||
| 233 | const u32 bw2 = dst_params.GetDefaultBlockWidth(); | ||
| 234 | const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; | ||
| 235 | const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; | ||
| 236 | return std::min(t_src_width, t_dst_width); | ||
| 237 | } | ||
| 238 | |||
| 239 | // Finds the maximun possible height between 2 2D layers of different formats | ||
| 240 | static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 241 | const u32 src_level, const u32 dst_level) { | ||
| 242 | const u32 bh1 = src_params.GetDefaultBlockHeight(); | ||
| 243 | const u32 bh2 = dst_params.GetDefaultBlockHeight(); | ||
| 244 | const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; | ||
| 245 | const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; | ||
| 246 | return std::min(t_src_height, t_dst_height); | ||
| 247 | } | ||
| 248 | |||
| 249 | bool is_tiled; | ||
| 250 | bool srgb_conversion; | ||
| 251 | bool is_layered; | ||
| 252 | u32 block_width; | ||
| 253 | u32 block_height; | ||
| 254 | u32 block_depth; | ||
| 255 | u32 tile_width_spacing; | ||
| 256 | u32 width; | ||
| 257 | u32 height; | ||
| 258 | u32 depth; | ||
| 259 | u32 pitch; | ||
| 260 | u32 num_levels; | ||
| 261 | u32 emulated_levels; | ||
| 262 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 263 | VideoCore::Surface::SurfaceType type; | ||
| 264 | VideoCore::Surface::SurfaceTarget target; | ||
| 265 | |||
| 266 | private: | ||
| 267 | /// Returns the size of a given mipmap level inside a layer. | ||
| 268 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; | ||
| 269 | |||
| 270 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 271 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | ||
| 272 | return GetLayerSize(as_host_size, uncompressed) * | ||
| 273 | (layer_only ? 1U : (is_layered ? depth : 1U)); | ||
| 274 | } | ||
| 275 | |||
| 276 | /// Returns the size of a layer | ||
| 277 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | ||
| 278 | |||
| 279 | /// Returns true if these parameters are from a layered surface. | ||
| 280 | bool IsLayered() const; | ||
| 281 | }; | ||
| 282 | |||
| 283 | } // namespace VideoCommon | ||
| 284 | |||
| 285 | namespace std { | ||
| 286 | |||
| 287 | template <> | ||
| 288 | struct hash<VideoCommon::SurfaceParams> { | ||
| 289 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 290 | return k.Hash(); | ||
| 291 | } | ||
| 292 | }; | ||
| 293 | |||
| 294 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/texture_cache/surface_view.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | std::size_t ViewParams::Hash() const { | ||
| 13 | return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ | ||
| 14 | (static_cast<std::size_t>(base_level) << 24) ^ | ||
| 15 | (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); | ||
| 16 | } | ||
| 17 | |||
| 18 | bool ViewParams::operator==(const ViewParams& rhs) const { | ||
| 19 | return std::tie(base_layer, num_layers, base_level, num_levels, target) == | ||
| 20 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool ViewParams::operator!=(const ViewParams& rhs) const { | ||
| 24 | return !operator==(rhs); | ||
| 25 | } | ||
| 26 | |||
| 27 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 199f72732..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | struct ViewParams { | ||
| 16 | constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, | ||
| 17 | u32 num_layers_, u32 base_level_, u32 num_levels_) | ||
| 18 | : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, | ||
| 19 | base_level{base_level_}, num_levels{num_levels_} {} | ||
| 20 | |||
| 21 | std::size_t Hash() const; | ||
| 22 | |||
| 23 | bool operator==(const ViewParams& rhs) const; | ||
| 24 | bool operator!=(const ViewParams& rhs) const; | ||
| 25 | |||
| 26 | bool IsLayered() const { | ||
| 27 | switch (target) { | ||
| 28 | case VideoCore::Surface::SurfaceTarget::Texture1DArray: | ||
| 29 | case VideoCore::Surface::SurfaceTarget::Texture2DArray: | ||
| 30 | case VideoCore::Surface::SurfaceTarget::TextureCubemap: | ||
| 31 | case VideoCore::Surface::SurfaceTarget::TextureCubeArray: | ||
| 32 | return true; | ||
| 33 | default: | ||
| 34 | return false; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | VideoCore::Surface::SurfaceTarget target{}; | ||
| 39 | u32 base_layer{}; | ||
| 40 | u32 num_layers{}; | ||
| 41 | u32 base_level{}; | ||
| 42 | u32 num_levels{}; | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ViewBase { | ||
| 46 | public: | ||
| 47 | constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} | ||
| 48 | |||
| 49 | constexpr const ViewParams& GetViewParams() const { | ||
| 50 | return params; | ||
| 51 | } | ||
| 52 | |||
| 53 | protected: | ||
| 54 | ViewParams params; | ||
| 55 | }; | ||
| 56 | |||
| 57 | } // namespace VideoCommon | ||
| 58 | |||
| 59 | namespace std { | ||
| 60 | |||
| 61 | template <> | ||
| 62 | struct hash<VideoCommon::ViewParams> { | ||
| 63 | std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { | ||
| 64 | return k.Hash(); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 581d8dd5b..968059842 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -6,1298 +6,1449 @@ | |||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <list> | 9 | #include <bit> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <set> | 12 | #include <optional> |
| 13 | #include <tuple> | 13 | #include <span> |
| 14 | #include <type_traits> | ||
| 14 | #include <unordered_map> | 15 | #include <unordered_map> |
| 16 | #include <utility> | ||
| 15 | #include <vector> | 17 | #include <vector> |
| 16 | 18 | ||
| 17 | #include <boost/container/small_vector.hpp> | 19 | #include <boost/container/small_vector.hpp> |
| 18 | #include <boost/icl/interval_map.hpp> | ||
| 19 | #include <boost/range/iterator_range.hpp> | ||
| 20 | 20 | ||
| 21 | #include "common/assert.h" | 21 | #include "common/alignment.h" |
| 22 | #include "common/common_funcs.h" | ||
| 22 | #include "common/common_types.h" | 23 | #include "common/common_types.h" |
| 23 | #include "common/math_util.h" | 24 | #include "common/logging/log.h" |
| 24 | #include "core/core.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "core/settings.h" | ||
| 27 | #include "video_core/compatible_formats.h" | 25 | #include "video_core/compatible_formats.h" |
| 26 | #include "video_core/delayed_destruction_ring.h" | ||
| 28 | #include "video_core/dirty_flags.h" | 27 | #include "video_core/dirty_flags.h" |
| 29 | #include "video_core/engines/fermi_2d.h" | 28 | #include "video_core/engines/fermi_2d.h" |
| 29 | #include "video_core/engines/kepler_compute.h" | ||
| 30 | #include "video_core/engines/maxwell_3d.h" | 30 | #include "video_core/engines/maxwell_3d.h" |
| 31 | #include "video_core/gpu.h" | ||
| 32 | #include "video_core/memory_manager.h" | 31 | #include "video_core/memory_manager.h" |
| 33 | #include "video_core/rasterizer_interface.h" | 32 | #include "video_core/rasterizer_interface.h" |
| 34 | #include "video_core/surface.h" | 33 | #include "video_core/surface.h" |
| 35 | #include "video_core/texture_cache/copy_params.h" | 34 | #include "video_core/texture_cache/descriptor_table.h" |
| 36 | #include "video_core/texture_cache/format_lookup_table.h" | 35 | #include "video_core/texture_cache/format_lookup_table.h" |
| 37 | #include "video_core/texture_cache/surface_base.h" | 36 | #include "video_core/texture_cache/formatter.h" |
| 38 | #include "video_core/texture_cache/surface_params.h" | 37 | #include "video_core/texture_cache/image_base.h" |
| 39 | #include "video_core/texture_cache/surface_view.h" | 38 | #include "video_core/texture_cache/image_info.h" |
| 40 | 39 | #include "video_core/texture_cache/image_view_base.h" | |
| 41 | namespace Tegra::Texture { | 40 | #include "video_core/texture_cache/image_view_info.h" |
| 42 | struct FullTextureInfo; | 41 | #include "video_core/texture_cache/render_targets.h" |
| 43 | } | 42 | #include "video_core/texture_cache/samples_helper.h" |
| 44 | 43 | #include "video_core/texture_cache/slot_vector.h" | |
| 45 | namespace VideoCore { | 44 | #include "video_core/texture_cache/types.h" |
| 46 | class RasterizerInterface; | 45 | #include "video_core/texture_cache/util.h" |
| 47 | } | 46 | #include "video_core/textures/texture.h" |
| 48 | 47 | ||
| 49 | namespace VideoCommon { | 48 | namespace VideoCommon { |
| 50 | 49 | ||
| 51 | using VideoCore::Surface::FormatCompatibility; | 50 | using Tegra::Texture::SwizzleSource; |
| 51 | using Tegra::Texture::TextureType; | ||
| 52 | using Tegra::Texture::TICEntry; | ||
| 53 | using Tegra::Texture::TSCEntry; | ||
| 54 | using VideoCore::Surface::GetFormatType; | ||
| 55 | using VideoCore::Surface::IsCopyCompatible; | ||
| 52 | using VideoCore::Surface::PixelFormat; | 56 | using VideoCore::Surface::PixelFormat; |
| 53 | using VideoCore::Surface::SurfaceTarget; | 57 | using VideoCore::Surface::PixelFormatFromDepthFormat; |
| 54 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | 58 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 59 | using VideoCore::Surface::SurfaceType; | ||
| 55 | 60 | ||
| 56 | template <typename TSurface, typename TView> | 61 | template <class P> |
| 57 | class TextureCache { | 62 | class TextureCache { |
| 58 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | 63 | /// Address shift for caching images into a hash table |
| 64 | static constexpr u64 PAGE_SHIFT = 20; | ||
| 65 | |||
| 66 | /// Enables debugging features to the texture cache | ||
| 67 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | ||
| 68 | /// Implement blits as copies between framebuffers | ||
| 69 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | ||
| 70 | /// True when some copies have to be emulated | ||
| 71 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||
| 72 | |||
| 73 | /// Image view ID for null descriptors | ||
| 74 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 75 | /// Sampler ID for bugged sampler ids | ||
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 77 | |||
| 78 | using Runtime = typename P::Runtime; | ||
| 79 | using Image = typename P::Image; | ||
| 80 | using ImageAlloc = typename P::ImageAlloc; | ||
| 81 | using ImageView = typename P::ImageView; | ||
| 82 | using Sampler = typename P::Sampler; | ||
| 83 | using Framebuffer = typename P::Framebuffer; | ||
| 84 | |||
| 85 | struct BlitImages { | ||
| 86 | ImageId dst_id; | ||
| 87 | ImageId src_id; | ||
| 88 | PixelFormat dst_format; | ||
| 89 | PixelFormat src_format; | ||
| 90 | }; | ||
| 91 | |||
| 92 | template <typename T> | ||
| 93 | struct IdentityHash { | ||
| 94 | [[nodiscard]] size_t operator()(T value) const noexcept { | ||
| 95 | return static_cast<size_t>(value); | ||
| 96 | } | ||
| 97 | }; | ||
| 59 | 98 | ||
| 60 | public: | 99 | public: |
| 61 | void InvalidateRegion(VAddr addr, std::size_t size) { | 100 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, |
| 62 | std::lock_guard lock{mutex}; | 101 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); |
| 63 | 102 | ||
| 64 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 103 | /// Notify the cache that a new frame has been queued |
| 65 | Unregister(surface); | 104 | void TickFrame(); |
| 66 | } | ||
| 67 | } | ||
| 68 | 105 | ||
| 69 | void OnCPUWrite(VAddr addr, std::size_t size) { | 106 | /// Return an unique mutually exclusive lock for the cache |
| 70 | std::lock_guard lock{mutex}; | 107 | [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); |
| 71 | 108 | ||
| 72 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 109 | /// Return a constant reference to the given image view id |
| 73 | if (surface->IsMemoryMarked()) { | 110 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 74 | UnmarkMemory(surface); | ||
| 75 | surface->SetSyncPending(true); | ||
| 76 | marked_for_unregister.emplace_back(surface); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | 111 | ||
| 81 | void SyncGuestHost() { | 112 | /// Return a reference to the given image view id |
| 82 | std::lock_guard lock{mutex}; | 113 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; |
| 83 | 114 | ||
| 84 | for (const auto& surface : marked_for_unregister) { | 115 | /// Fill image_view_ids with the graphics images in indices |
| 85 | if (surface->IsRegistered()) { | 116 | void FillGraphicsImageViews(std::span<const u32> indices, |
| 86 | surface->SetSyncPending(false); | 117 | std::span<ImageViewId> image_view_ids); |
| 87 | Unregister(surface); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | marked_for_unregister.clear(); | ||
| 91 | } | ||
| 92 | 118 | ||
| 93 | /** | 119 | /// Fill image_view_ids with the compute images in indices |
| 94 | * Guarantees that rendertargets don't unregister themselves if the | 120 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); |
| 95 | * collide. Protection is currently only done on 3D slices. | ||
| 96 | */ | ||
| 97 | void GuardRenderTargets(bool new_guard) { | ||
| 98 | guard_render_targets = new_guard; | ||
| 99 | } | ||
| 100 | 121 | ||
| 101 | void GuardSamplers(bool new_guard) { | 122 | /// Get the sampler from the graphics descriptor table in the specified index |
| 102 | guard_samplers = new_guard; | 123 | Sampler* GetGraphicsSampler(u32 index); |
| 103 | } | ||
| 104 | 124 | ||
| 105 | void FlushRegion(VAddr addr, std::size_t size) { | 125 | /// Get the sampler from the compute descriptor table in the specified index |
| 106 | std::lock_guard lock{mutex}; | 126 | Sampler* GetComputeSampler(u32 index); |
| 107 | 127 | ||
| 108 | auto surfaces = GetSurfacesInRegion(addr, size); | 128 | /// Refresh the state for graphics image view and sampler descriptors |
| 109 | if (surfaces.empty()) { | 129 | void SynchronizeGraphicsDescriptors(); |
| 110 | return; | ||
| 111 | } | ||
| 112 | std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { | ||
| 113 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 114 | }); | ||
| 115 | for (const auto& surface : surfaces) { | ||
| 116 | mutex.unlock(); | ||
| 117 | FlushSurface(surface); | ||
| 118 | mutex.lock(); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 130 | ||
| 122 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 131 | /// Refresh the state for compute image view and sampler descriptors |
| 123 | std::lock_guard lock{mutex}; | 132 | void SynchronizeComputeDescriptors(); |
| 124 | 133 | ||
| 125 | const auto surfaces = GetSurfacesInRegion(addr, size); | 134 | /// Update bound render targets and upload memory if necessary |
| 126 | return std::any_of(surfaces.cbegin(), surfaces.cend(), | 135 | /// @param is_clear True when the render targets are being used for clears |
| 127 | [](const TSurface& surface) { return surface->IsModified(); }); | 136 | void UpdateRenderTargets(bool is_clear); |
| 128 | } | ||
| 129 | 137 | ||
| 130 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | 138 | /// Find a framebuffer with the currently bound render targets |
| 131 | const VideoCommon::Shader::Sampler& entry) { | 139 | /// UpdateRenderTargets should be called before this |
| 132 | std::lock_guard lock{mutex}; | 140 | Framebuffer* GetFramebuffer(); |
| 133 | const auto gpu_addr{tic.Address()}; | ||
| 134 | if (!gpu_addr) { | ||
| 135 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 136 | } | ||
| 137 | 141 | ||
| 138 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 142 | /// Mark images in a range as modified from the CPU |
| 139 | if (!cpu_addr) { | 143 | void WriteMemory(VAddr cpu_addr, size_t size); |
| 140 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 141 | } | ||
| 142 | 144 | ||
| 143 | if (!IsTypeCompatible(tic.texture_type, entry)) { | 145 | /// Download contents of host images to guest memory in a region |
| 144 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 146 | void DownloadMemory(VAddr cpu_addr, size_t size); |
| 145 | } | ||
| 146 | 147 | ||
| 147 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 148 | /// Remove images in a region |
| 148 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | 149 | void UnmapMemory(VAddr cpu_addr, size_t size); |
| 149 | if (guard_samplers) { | ||
| 150 | sampled_textures.push_back(surface); | ||
| 151 | } | ||
| 152 | return view; | ||
| 153 | } | ||
| 154 | 150 | ||
| 155 | TView GetImageSurface(const Tegra::Texture::TICEntry& tic, | 151 | /// Blit an image with the given parameters |
| 156 | const VideoCommon::Shader::Image& entry) { | 152 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 157 | std::lock_guard lock{mutex}; | 153 | const Tegra::Engines::Fermi2D::Surface& src, |
| 158 | const auto gpu_addr{tic.Address()}; | 154 | const Tegra::Engines::Fermi2D::Config& copy); |
| 159 | if (!gpu_addr) { | ||
| 160 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 161 | } | ||
| 162 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 163 | if (!cpu_addr) { | ||
| 164 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 165 | } | ||
| 166 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | ||
| 167 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | ||
| 168 | if (guard_samplers) { | ||
| 169 | sampled_textures.push_back(surface); | ||
| 170 | } | ||
| 171 | return view; | ||
| 172 | } | ||
| 173 | 155 | ||
| 174 | bool TextureBarrier() { | 156 | /// Invalidate the contents of the color buffer index |
| 175 | const bool any_rt = | 157 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 176 | std::any_of(sampled_textures.begin(), sampled_textures.end(), | 158 | void InvalidateColorBuffer(size_t index); |
| 177 | [](const auto& surface) { return surface->IsRenderTarget(); }); | ||
| 178 | sampled_textures.clear(); | ||
| 179 | return any_rt; | ||
| 180 | } | ||
| 181 | 159 | ||
| 182 | TView GetDepthBufferSurface(bool preserve_contents) { | 160 | /// Invalidate the contents of the depth buffer |
| 183 | std::lock_guard lock{mutex}; | 161 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 184 | auto& dirty = maxwell3d.dirty; | 162 | void InvalidateDepthBuffer(); |
| 185 | if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { | ||
| 186 | return depth_buffer.view; | ||
| 187 | } | ||
| 188 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; | ||
| 189 | 163 | ||
| 190 | const auto& regs{maxwell3d.regs}; | 164 | /// Try to find a cached image view in the given CPU address |
| 191 | const auto gpu_addr{regs.zeta.Address()}; | 165 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); |
| 192 | if (!gpu_addr || !regs.zeta_enable) { | ||
| 193 | SetEmptyDepthBuffer(); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 197 | if (!cpu_addr) { | ||
| 198 | SetEmptyDepthBuffer(); | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; | ||
| 202 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); | ||
| 203 | if (depth_buffer.target) | ||
| 204 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 205 | depth_buffer.target = surface_view.first; | ||
| 206 | depth_buffer.view = surface_view.second; | ||
| 207 | if (depth_buffer.target) | ||
| 208 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); | ||
| 209 | return surface_view.second; | ||
| 210 | } | ||
| 211 | |||
| 212 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 213 | std::lock_guard lock{mutex}; | ||
| 214 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 215 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { | ||
| 216 | return render_targets[index].view; | ||
| 217 | } | ||
| 218 | maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; | ||
| 219 | 166 | ||
| 220 | const auto& regs{maxwell3d.regs}; | 167 | /// Return true when there are uncommitted images to be downloaded |
| 221 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 168 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 222 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 223 | SetEmptyColorBuffer(index); | ||
| 224 | return {}; | ||
| 225 | } | ||
| 226 | 169 | ||
| 227 | const auto& config{regs.rt[index]}; | 170 | /// Return true when the caller should wait for async downloads |
| 228 | const auto gpu_addr{config.Address()}; | 171 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 229 | if (!gpu_addr) { | ||
| 230 | SetEmptyColorBuffer(index); | ||
| 231 | return {}; | ||
| 232 | } | ||
| 233 | 172 | ||
| 234 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 173 | /// Commit asynchronous downloads |
| 235 | if (!cpu_addr) { | 174 | void CommitAsyncFlushes(); |
| 236 | SetEmptyColorBuffer(index); | 175 | |
| 237 | return {}; | 176 | /// Pop asynchronous downloads |
| 238 | } | 177 | void PopAsyncFlushes(); |
| 178 | |||
| 179 | /// Return true when a CPU region is modified from the GPU | ||
| 180 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 239 | 181 | ||
| 240 | auto surface_view = | 182 | private: |
| 241 | GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), | 183 | /// Iterate over all page indices in a range |
| 242 | preserve_contents, true); | 184 | template <typename Func> |
| 243 | if (render_targets[index].target) { | 185 | static void ForEachPage(VAddr addr, size_t size, Func&& func) { |
| 244 | auto& surface = render_targets[index].target; | 186 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 245 | surface->MarkAsRenderTarget(false, NO_RT); | 187 | const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; |
| 246 | const auto& cr_params = surface->GetSurfaceParams(); | 188 | for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { |
| 247 | if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 189 | if constexpr (RETURNS_BOOL) { |
| 248 | AsyncFlushSurface(surface); | 190 | if (func(page)) { |
| 191 | break; | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | func(page); | ||
| 249 | } | 195 | } |
| 250 | } | 196 | } |
| 251 | render_targets[index].target = surface_view.first; | ||
| 252 | render_targets[index].view = surface_view.second; | ||
| 253 | if (render_targets[index].target) | ||
| 254 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); | ||
| 255 | return surface_view.second; | ||
| 256 | } | 197 | } |
| 257 | 198 | ||
| 258 | void MarkColorBufferInUse(std::size_t index) { | 199 | /// Fills image_view_ids in the image views in indices |
| 259 | if (auto& render_target = render_targets[index].target) { | 200 | void FillImageViews(DescriptorTable<TICEntry>& table, |
| 260 | render_target->MarkAsModified(true, Tick()); | 201 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, |
| 261 | } | 202 | std::span<ImageViewId> image_view_ids); |
| 262 | } | ||
| 263 | 203 | ||
| 264 | void MarkDepthBufferInUse() { | 204 | /// Find or create an image view in the guest descriptor table |
| 265 | if (depth_buffer.target) { | 205 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, |
| 266 | depth_buffer.target->MarkAsModified(true, Tick()); | 206 | std::span<ImageViewId> cached_image_view_ids, u32 index); |
| 267 | } | ||
| 268 | } | ||
| 269 | 207 | ||
| 270 | void SetEmptyDepthBuffer() { | 208 | /// Find or create a framebuffer with the given render target parameters |
| 271 | if (depth_buffer.target == nullptr) { | 209 | FramebufferId GetFramebufferId(const RenderTargets& key); |
| 272 | return; | ||
| 273 | } | ||
| 274 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 275 | depth_buffer.target = nullptr; | ||
| 276 | depth_buffer.view = nullptr; | ||
| 277 | } | ||
| 278 | 210 | ||
| 279 | void SetEmptyColorBuffer(std::size_t index) { | 211 | /// Refresh the contents (pixel data) of an image |
| 280 | if (render_targets[index].target == nullptr) { | 212 | void RefreshContents(Image& image); |
| 281 | return; | ||
| 282 | } | ||
| 283 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||
| 284 | render_targets[index].target = nullptr; | ||
| 285 | render_targets[index].view = nullptr; | ||
| 286 | } | ||
| 287 | |||
| 288 | void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 289 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 290 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 291 | std::lock_guard lock{mutex}; | ||
| 292 | SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||
| 293 | SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||
| 294 | const GPUVAddr src_gpu_addr = src_config.Address(); | ||
| 295 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | ||
| 296 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | ||
| 297 | |||
| 298 | const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); | ||
| 299 | const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); | ||
| 300 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | ||
| 301 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; | ||
| 302 | ImageBlit(src_surface, dst_surface.second, copy_config); | ||
| 303 | dst_surface.first->MarkAsModified(true, Tick()); | ||
| 304 | } | ||
| 305 | |||
| 306 | TSurface TryFindFramebufferSurface(VAddr addr) const { | ||
| 307 | if (!addr) { | ||
| 308 | return nullptr; | ||
| 309 | } | ||
| 310 | const VAddr page = addr >> registry_page_bits; | ||
| 311 | const auto it = registry.find(page); | ||
| 312 | if (it == registry.end()) { | ||
| 313 | return nullptr; | ||
| 314 | } | ||
| 315 | const auto& list = it->second; | ||
| 316 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 317 | return surface->GetCpuAddr() == addr; | ||
| 318 | }); | ||
| 319 | return found != list.end() ? *found : nullptr; | ||
| 320 | } | ||
| 321 | 213 | ||
| 322 | u64 Tick() { | 214 | /// Upload data from guest to an image |
| 323 | return ++ticks; | 215 | template <typename MapBuffer> |
| 324 | } | 216 | void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); |
| 325 | 217 | ||
| 326 | void CommitAsyncFlushes() { | 218 | /// Find or create an image view from a guest descriptor |
| 327 | committed_flushes.push_back(uncommitted_flushes); | 219 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| 328 | uncommitted_flushes.reset(); | ||
| 329 | } | ||
| 330 | 220 | ||
| 331 | bool HasUncommittedFlushes() const { | 221 | /// Create a new image view from a guest descriptor |
| 332 | return uncommitted_flushes != nullptr; | 222 | [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); |
| 333 | } | ||
| 334 | 223 | ||
| 335 | bool ShouldWaitAsyncFlushes() const { | 224 | /// Find or create an image from the given parameters |
| 336 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | 225 | [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 337 | } | 226 | RelaxedOptions options = RelaxedOptions{}); |
| 338 | 227 | ||
| 339 | void PopAsyncFlushes() { | 228 | /// Find an image from the given parameters |
| 340 | if (committed_flushes.empty()) { | 229 | [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 341 | return; | 230 | RelaxedOptions options); |
| 342 | } | ||
| 343 | auto& flush_list = committed_flushes.front(); | ||
| 344 | if (!flush_list) { | ||
| 345 | committed_flushes.pop_front(); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | for (TSurface& surface : *flush_list) { | ||
| 349 | FlushSurface(surface); | ||
| 350 | } | ||
| 351 | committed_flushes.pop_front(); | ||
| 352 | } | ||
| 353 | 231 | ||
| 354 | protected: | 232 | /// Create an image from the given parameters |
| 355 | explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, | 233 | [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 356 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 234 | RelaxedOptions options); |
| 357 | bool is_astc_supported_) | ||
| 358 | : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 359 | gpu_memory{gpu_memory_} { | ||
| 360 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 361 | SetEmptyColorBuffer(i); | ||
| 362 | } | ||
| 363 | 235 | ||
| 364 | SetEmptyDepthBuffer(); | 236 | /// Create a new image and join perfectly matching existing images |
| 365 | staging_cache.SetSize(2); | 237 | /// Remove joined images from the cache |
| 238 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 366 | 239 | ||
| 367 | const auto make_siblings = [this](PixelFormat a, PixelFormat b) { | 240 | /// Return a blit image pair from the given guest blit parameters |
| 368 | siblings_table[static_cast<std::size_t>(a)] = b; | 241 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, |
| 369 | siblings_table[static_cast<std::size_t>(b)] = a; | 242 | const Tegra::Engines::Fermi2D::Surface& src); |
| 370 | }; | ||
| 371 | std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); | ||
| 372 | make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); | ||
| 373 | make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); | ||
| 374 | make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); | ||
| 375 | 243 | ||
| 376 | sampled_textures.reserve(64); | 244 | /// Find or create a sampler from a guest descriptor sampler |
| 377 | } | 245 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
| 378 | 246 | ||
| 379 | ~TextureCache() = default; | 247 | /// Find or create an image view for the given color buffer index |
| 248 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | ||
| 380 | 249 | ||
| 381 | virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; | 250 | /// Find or create an image view for the depth buffer |
| 251 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | ||
| 382 | 252 | ||
| 383 | virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, | 253 | /// Find or create a view for a render target with the given image parameters |
| 384 | const CopyParams& copy_params) = 0; | 254 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 255 | bool is_clear); | ||
| 385 | 256 | ||
| 386 | virtual void ImageBlit(TView& src_view, TView& dst_view, | 257 | /// Iterates over all the images in a region calling func |
| 387 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | 258 | template <typename Func> |
| 259 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||
| 388 | 260 | ||
| 389 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | 261 | /// Find or create an image view in the given image with the passed parameters |
| 390 | // and reading it from a separate buffer. | 262 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); |
| 391 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||
| 392 | 263 | ||
| 393 | void ManageRenderTargetUnregister(TSurface& surface) { | 264 | /// Register image in the page table |
| 394 | auto& dirty = maxwell3d.dirty; | 265 | void RegisterImage(ImageId image); |
| 395 | const u32 index = surface->GetRenderTarget(); | 266 | |
| 396 | if (index == DEPTH_RT) { | 267 | /// Unregister image from the page table |
| 397 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; | 268 | void UnregisterImage(ImageId image); |
| 398 | } else { | 269 | |
| 399 | dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; | 270 | /// Track CPU reads and writes for image |
| 400 | } | 271 | void TrackImage(ImageBase& image); |
| 401 | dirty.flags[VideoCommon::Dirty::RenderTargets] = true; | 272 | |
| 273 | /// Stop tracking CPU reads and writes for image | ||
| 274 | void UntrackImage(ImageBase& image); | ||
| 275 | |||
| 276 | /// Delete image from the cache | ||
| 277 | void DeleteImage(ImageId image); | ||
| 278 | |||
| 279 | /// Remove image views references from the cache | ||
| 280 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | ||
| 281 | |||
| 282 | /// Remove framebuffers using the given image views from the cache | ||
| 283 | void RemoveFramebuffers(std::span<const ImageViewId> removed_views); | ||
| 284 | |||
| 285 | /// Mark an image as modified from the GPU | ||
| 286 | void MarkModification(ImageBase& image) noexcept; | ||
| 287 | |||
| 288 | /// Synchronize image aliases, copying data if needed | ||
| 289 | void SynchronizeAliases(ImageId image_id); | ||
| 290 | |||
| 291 | /// Prepare an image to be used | ||
| 292 | void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); | ||
| 293 | |||
| 294 | /// Prepare an image view to be used | ||
| 295 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | ||
| 296 | |||
| 297 | /// Execute copies from one image to the other, even if they are incompatible | ||
| 298 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | ||
| 299 | |||
| 300 | /// Bind an image view as render target, downloading resources preemtively if needed | ||
| 301 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | ||
| 302 | |||
| 303 | /// Create a render target from a given image and image view parameters | ||
| 304 | [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( | ||
| 305 | ImageId, const ImageViewInfo& view_info); | ||
| 306 | |||
| 307 | /// Returns true if the current clear parameters clear the whole image of a given image view | ||
| 308 | [[nodiscard]] bool IsFullClear(ImageViewId id); | ||
| 309 | |||
| 310 | Runtime& runtime; | ||
| 311 | VideoCore::RasterizerInterface& rasterizer; | ||
| 312 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 313 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 314 | Tegra::MemoryManager& gpu_memory; | ||
| 315 | |||
| 316 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | ||
| 317 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | ||
| 318 | std::vector<SamplerId> graphics_sampler_ids; | ||
| 319 | std::vector<ImageViewId> graphics_image_view_ids; | ||
| 320 | |||
| 321 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | ||
| 322 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | ||
| 323 | std::vector<SamplerId> compute_sampler_ids; | ||
| 324 | std::vector<ImageViewId> compute_image_view_ids; | ||
| 325 | |||
| 326 | RenderTargets render_targets; | ||
| 327 | |||
| 328 | std::mutex mutex; | ||
| 329 | |||
| 330 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 331 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 332 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||
| 333 | |||
| 334 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | ||
| 335 | |||
| 336 | bool has_deleted_images = false; | ||
| 337 | |||
| 338 | SlotVector<Image> slot_images; | ||
| 339 | SlotVector<ImageView> slot_image_views; | ||
| 340 | SlotVector<ImageAlloc> slot_image_allocs; | ||
| 341 | SlotVector<Sampler> slot_samplers; | ||
| 342 | SlotVector<Framebuffer> slot_framebuffers; | ||
| 343 | |||
| 344 | // TODO: This data structure is not optimal and it should be reworked | ||
| 345 | std::vector<ImageId> uncommitted_downloads; | ||
| 346 | std::queue<std::vector<ImageId>> committed_downloads; | ||
| 347 | |||
| 348 | static constexpr size_t TICKS_TO_DESTROY = 6; | ||
| 349 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | ||
| 350 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | ||
| 351 | DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; | ||
| 352 | |||
| 353 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | ||
| 354 | |||
| 355 | u64 modification_tick = 0; | ||
| 356 | u64 frame_tick = 0; | ||
| 357 | }; | ||
| 358 | |||
| 359 | template <class P> | ||
| 360 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 361 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 362 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 363 | Tegra::MemoryManager& gpu_memory_) | ||
| 364 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 365 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 366 | // Configure null sampler | ||
| 367 | TSCEntry sampler_descriptor{}; | ||
| 368 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 369 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 370 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 371 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 372 | |||
| 373 | // Make sure the first index is reserved for the null resources | ||
| 374 | // This way the null resource becomes a compile time constant | ||
| 375 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 376 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 377 | } | ||
| 378 | |||
| 379 | template <class P> | ||
| 380 | void TextureCache<P>::TickFrame() { | ||
| 381 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | ||
| 382 | sentenced_images.Tick(); | ||
| 383 | sentenced_framebuffers.Tick(); | ||
| 384 | sentenced_image_view.Tick(); | ||
| 385 | ++frame_tick; | ||
| 386 | } | ||
| 387 | |||
| 388 | template <class P> | ||
| 389 | std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||
| 390 | return std::unique_lock{mutex}; | ||
| 391 | } | ||
| 392 | |||
| 393 | template <class P> | ||
| 394 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 395 | return slot_image_views[id]; | ||
| 396 | } | ||
| 397 | |||
| 398 | template <class P> | ||
| 399 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 400 | return slot_image_views[id]; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <class P> | ||
| 404 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 405 | std::span<ImageViewId> image_view_ids) { | ||
| 406 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 407 | } | ||
| 408 | |||
| 409 | template <class P> | ||
| 410 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 411 | std::span<ImageViewId> image_view_ids) { | ||
| 412 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 413 | } | ||
| 414 | |||
| 415 | template <class P> | ||
| 416 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 417 | [[unlikely]] if (index > graphics_sampler_table.Limit()) { | ||
| 418 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); | ||
| 419 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 420 | } | ||
| 421 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 422 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 423 | [[unlikely]] if (is_new) { | ||
| 424 | id = FindSampler(descriptor); | ||
| 402 | } | 425 | } |
| 426 | return &slot_samplers[id]; | ||
| 427 | } | ||
| 403 | 428 | ||
| 404 | void Register(TSurface surface) { | 429 | template <class P> |
| 405 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 430 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 406 | const std::size_t size = surface->GetSizeInBytes(); | 431 | [[unlikely]] if (index > compute_sampler_table.Limit()) { |
| 407 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 432 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); |
| 408 | if (!cpu_addr) { | 433 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 409 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | 434 | } |
| 410 | gpu_addr); | 435 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); |
| 411 | return; | 436 | SamplerId& id = compute_sampler_ids[index]; |
| 412 | } | 437 | [[unlikely]] if (is_new) { |
| 413 | surface->SetCpuAddr(*cpu_addr); | 438 | id = FindSampler(descriptor); |
| 414 | RegisterInnerCache(surface); | ||
| 415 | surface->MarkAsRegistered(true); | ||
| 416 | surface->SetMemoryMarked(true); | ||
| 417 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 418 | } | 439 | } |
| 440 | return &slot_samplers[id]; | ||
| 441 | } | ||
| 419 | 442 | ||
| 420 | void UnmarkMemory(TSurface surface) { | 443 | template <class P> |
| 421 | if (!surface->IsMemoryMarked()) { | 444 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
| 422 | return; | 445 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; |
| 423 | } | 446 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
| 424 | const std::size_t size = surface->GetSizeInBytes(); | 447 | const u32 tic_limit = maxwell3d.regs.tic.limit; |
| 425 | const VAddr cpu_addr = surface->GetCpuAddr(); | 448 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; |
| 426 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 449 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { |
| 427 | surface->SetMemoryMarked(false); | 450 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 428 | } | 451 | } |
| 452 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 453 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 454 | } | ||
| 455 | } | ||
| 429 | 456 | ||
| 430 | void Unregister(TSurface surface) { | 457 | template <class P> |
| 431 | if (guard_render_targets && surface->IsProtected()) { | 458 | void TextureCache<P>::SynchronizeComputeDescriptors() { |
| 432 | return; | 459 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; |
| 433 | } | 460 | const u32 tic_limit = kepler_compute.regs.tic.limit; |
| 434 | if (!guard_render_targets && surface->IsRenderTarget()) { | 461 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; |
| 435 | ManageRenderTargetUnregister(surface); | 462 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); |
| 436 | } | 463 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
| 437 | UnmarkMemory(surface); | 464 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 438 | if (surface->IsSyncPending()) { | ||
| 439 | marked_for_unregister.remove(surface); | ||
| 440 | surface->SetSyncPending(false); | ||
| 441 | } | ||
| 442 | UnregisterInnerCache(surface); | ||
| 443 | surface->MarkAsRegistered(false); | ||
| 444 | ReserveSurface(surface->GetSurfaceParams(), surface); | ||
| 445 | } | 465 | } |
| 466 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 467 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 468 | } | ||
| 469 | } | ||
| 446 | 470 | ||
| 447 | TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 471 | template <class P> |
| 448 | if (const auto surface = TryGetReservedSurface(params); surface) { | 472 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 449 | surface->SetGpuAddr(gpu_addr); | 473 | using namespace VideoCommon::Dirty; |
| 450 | return surface; | 474 | auto& flags = maxwell3d.dirty.flags; |
| 451 | } | 475 | if (!flags[Dirty::RenderTargets]) { |
| 452 | // No reserved surface available, create a new one and reserve it | 476 | return; |
| 453 | auto new_surface{CreateSurface(gpu_addr, params)}; | ||
| 454 | return new_surface; | ||
| 455 | } | 477 | } |
| 478 | flags[Dirty::RenderTargets] = false; | ||
| 456 | 479 | ||
| 457 | const bool is_astc_supported; | 480 | // Render target control is used on all render targets, so force look ups when this one is up |
| 481 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 482 | flags[Dirty::RenderTargetControl] = false; | ||
| 458 | 483 | ||
| 459 | private: | 484 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 460 | enum class RecycleStrategy : u32 { | 485 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 461 | Ignore = 0, | 486 | if (flags[Dirty::ColorBuffer0 + index] || force) { |
| 462 | Flush = 1, | 487 | flags[Dirty::ColorBuffer0 + index] = false; |
| 463 | BufferCopy = 3, | 488 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); |
| 464 | }; | 489 | } |
| 490 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 491 | } | ||
| 492 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 493 | flags[Dirty::ZetaBuffer] = false; | ||
| 494 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 495 | } | ||
| 496 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 497 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 465 | 498 | ||
| 466 | enum class DeductionType : u32 { | 499 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 467 | DeductionComplete, | 500 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
| 468 | DeductionIncomplete, | 501 | } |
| 469 | DeductionFailed, | 502 | render_targets.size = Extent2D{ |
| 503 | maxwell3d.regs.render_area.width, | ||
| 504 | maxwell3d.regs.render_area.height, | ||
| 470 | }; | 505 | }; |
| 506 | } | ||
| 471 | 507 | ||
| 472 | struct Deduction { | 508 | template <class P> |
| 473 | DeductionType type{DeductionType::DeductionFailed}; | 509 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { |
| 474 | TSurface surface{}; | 510 | return &slot_framebuffers[GetFramebufferId(render_targets)]; |
| 511 | } | ||
| 475 | 512 | ||
| 476 | bool Failed() const { | 513 | template <class P> |
| 477 | return type == DeductionType::DeductionFailed; | 514 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, |
| 478 | } | 515 | std::span<ImageViewId> cached_image_view_ids, |
| 516 | std::span<const u32> indices, | ||
| 517 | std::span<ImageViewId> image_view_ids) { | ||
| 518 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 519 | do { | ||
| 520 | has_deleted_images = false; | ||
| 521 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 522 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 523 | }); | ||
| 524 | } while (has_deleted_images); | ||
| 525 | } | ||
| 479 | 526 | ||
| 480 | bool Incomplete() const { | 527 | template <class P> |
| 481 | return type == DeductionType::DeductionIncomplete; | 528 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, |
| 482 | } | 529 | std::span<ImageViewId> cached_image_view_ids, |
| 530 | u32 index) { | ||
| 531 | if (index > table.Limit()) { | ||
| 532 | LOG_ERROR(HW_GPU, "Invalid image view index={}", index); | ||
| 533 | return NULL_IMAGE_VIEW_ID; | ||
| 534 | } | ||
| 535 | const auto [descriptor, is_new] = table.Read(index); | ||
| 536 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 537 | if (is_new) { | ||
| 538 | image_view_id = FindImageView(descriptor); | ||
| 539 | } | ||
| 540 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 541 | PrepareImageView(image_view_id, false, false); | ||
| 542 | } | ||
| 543 | return image_view_id; | ||
| 544 | } | ||
| 483 | 545 | ||
| 484 | bool IsDepth() const { | 546 | template <class P> |
| 485 | return surface->GetSurfaceParams().IsPixelFormatZeta(); | 547 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { |
| 486 | } | 548 | const auto [pair, is_new] = framebuffers.try_emplace(key); |
| 487 | }; | 549 | FramebufferId& framebuffer_id = pair->second; |
| 550 | if (!is_new) { | ||
| 551 | return framebuffer_id; | ||
| 552 | } | ||
| 553 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 554 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 555 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 556 | ImageView* const depth_buffer = | ||
| 557 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 558 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 559 | return framebuffer_id; | ||
| 560 | } | ||
| 488 | 561 | ||
| 489 | /** | 562 | template <class P> |
| 490 | * Takes care of selecting a proper strategy to deal with a texture recycle. | 563 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { |
| 491 | * | 564 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |
| 492 | * @param overlaps The overlapping surfaces registered in the cache. | 565 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 493 | * @param params The parameters on the new surface. | 566 | return; |
| 494 | * @param gpu_addr The starting address of the new surface. | ||
| 495 | * @param untopological Indicates to the recycler that the texture has no way | ||
| 496 | * to match the overlaps due to topological reasons. | ||
| 497 | **/ | ||
| 498 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 499 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||
| 500 | if (Settings::IsGPULevelExtreme()) { | ||
| 501 | return RecycleStrategy::Flush; | ||
| 502 | } | ||
| 503 | // 3D Textures decision | ||
| 504 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 505 | return RecycleStrategy::Flush; | ||
| 506 | } | ||
| 507 | for (const auto& s : overlaps) { | ||
| 508 | const auto& s_params = s->GetSurfaceParams(); | ||
| 509 | if (s_params.target == SurfaceTarget::Texture3D) { | ||
| 510 | return RecycleStrategy::Flush; | ||
| 511 | } | ||
| 512 | } | ||
| 513 | // Untopological decision | ||
| 514 | if (untopological == MatchTopologyResult::CompressUnmatch) { | ||
| 515 | return RecycleStrategy::Flush; | ||
| 516 | } | ||
| 517 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 518 | return RecycleStrategy::Flush; | ||
| 519 | } | ||
| 520 | return RecycleStrategy::Ignore; | ||
| 521 | } | ||
| 522 | |||
| 523 | /** | ||
| 524 | * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented | ||
| 525 | * strategies: Ignore and Flush. | ||
| 526 | * | ||
| 527 | * - Ignore: Just unregisters all the overlaps and loads the new texture. | ||
| 528 | * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. | ||
| 529 | * | ||
| 530 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 531 | * @param params The parameters for the new surface. | ||
| 532 | * @param gpu_addr The starting address of the new surface. | ||
| 533 | * @param preserve_contents Indicates that the new surface should be loaded from memory or left | ||
| 534 | * blank. | ||
| 535 | * @param untopological Indicates to the recycler that the texture has no way to match the | ||
| 536 | * overlaps due to topological reasons. | ||
| 537 | **/ | ||
| 538 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 539 | const GPUVAddr gpu_addr, const bool preserve_contents, | ||
| 540 | const MatchTopologyResult untopological) { | ||
| 541 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | ||
| 542 | for (auto& surface : overlaps) { | ||
| 543 | Unregister(surface); | ||
| 544 | } | ||
| 545 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||
| 546 | case RecycleStrategy::Ignore: { | ||
| 547 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 548 | } | ||
| 549 | case RecycleStrategy::Flush: { | ||
| 550 | std::sort(overlaps.begin(), overlaps.end(), | ||
| 551 | [](const TSurface& a, const TSurface& b) -> bool { | ||
| 552 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 553 | }); | ||
| 554 | for (auto& surface : overlaps) { | ||
| 555 | FlushSurface(surface); | ||
| 556 | } | ||
| 557 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 558 | } | 567 | } |
| 559 | case RecycleStrategy::BufferCopy: { | 568 | image.flags |= ImageFlagBits::CpuModified; |
| 560 | auto new_surface = GetUncachedSurface(gpu_addr, params); | 569 | UntrackImage(image); |
| 561 | BufferCopy(overlaps[0], new_surface); | 570 | }); |
| 562 | return {new_surface, new_surface->GetMainView()}; | 571 | } |
| 572 | |||
| 573 | template <class P> | ||
| 574 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 575 | std::vector<ImageId> images; | ||
| 576 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 577 | // Skip images that were not modified from the GPU | ||
| 578 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 579 | return; | ||
| 563 | } | 580 | } |
| 564 | default: { | 581 | // Skip images that .are. modified from the CPU |
| 565 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | 582 | // We don't want to write sensitive data from the guest |
| 566 | return InitializeSurface(gpu_addr, params, do_load); | 583 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 584 | return; | ||
| 567 | } | 585 | } |
| 586 | if (image.info.num_samples > 1) { | ||
| 587 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 588 | return; | ||
| 568 | } | 589 | } |
| 590 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 591 | images.push_back(image_id); | ||
| 592 | }); | ||
| 593 | if (images.empty()) { | ||
| 594 | return; | ||
| 595 | } | ||
| 596 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 597 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 598 | }); | ||
| 599 | for (const ImageId image_id : images) { | ||
| 600 | Image& image = slot_images[image_id]; | ||
| 601 | auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | ||
| 602 | const auto copies = FullDownloadCopies(image.info); | ||
| 603 | image.DownloadMemory(map, 0, copies); | ||
| 604 | runtime.Finish(); | ||
| 605 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | ||
| 569 | } | 606 | } |
| 607 | } | ||
| 570 | 608 | ||
| 571 | /** | 609 | template <class P> |
| 572 | * Takes a single surface and recreates into another that may differ in | 610 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 573 | * format, target or width alignment. | 611 | std::vector<ImageId> deleted_images; |
| 574 | * | 612 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 575 | * @param current_surface The registered surface in the cache which we want to convert. | 613 | for (const ImageId id : deleted_images) { |
| 576 | * @param params The new surface params which we'll use to recreate the surface. | 614 | Image& image = slot_images[id]; |
| 577 | * @param is_render Whether or not the surface is a render target. | 615 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 578 | **/ | 616 | UntrackImage(image); |
| 579 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | 617 | } |
| 580 | bool is_render) { | 618 | UnregisterImage(id); |
| 581 | const auto gpu_addr = current_surface->GetGpuAddr(); | 619 | DeleteImage(id); |
| 582 | const auto& cr_params = current_surface->GetSurfaceParams(); | 620 | } |
| 583 | TSurface new_surface; | 621 | } |
| 584 | if (cr_params.pixel_format != params.pixel_format && !is_render && | ||
| 585 | GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { | ||
| 586 | SurfaceParams new_params = params; | ||
| 587 | new_params.pixel_format = cr_params.pixel_format; | ||
| 588 | new_params.type = cr_params.type; | ||
| 589 | new_surface = GetUncachedSurface(gpu_addr, new_params); | ||
| 590 | } else { | ||
| 591 | new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 592 | } | ||
| 593 | const SurfaceParams& final_params = new_surface->GetSurfaceParams(); | ||
| 594 | if (cr_params.type != final_params.type) { | ||
| 595 | if (Settings::IsGPULevelExtreme()) { | ||
| 596 | BufferCopy(current_surface, new_surface); | ||
| 597 | } | ||
| 598 | } else { | ||
| 599 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | ||
| 600 | for (auto& brick : bricks) { | ||
| 601 | TryCopyImage(current_surface, new_surface, brick); | ||
| 602 | } | ||
| 603 | } | ||
| 604 | Unregister(current_surface); | ||
| 605 | Register(new_surface); | ||
| 606 | new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||
| 607 | return {new_surface, new_surface->GetMainView()}; | ||
| 608 | } | ||
| 609 | |||
| 610 | /** | ||
| 611 | * Takes a single surface and checks with the new surface's params if it's an exact | ||
| 612 | * match, we return the main view of the registered surface. If its formats don't | ||
| 613 | * match, we rebuild the surface. We call this last method a `Mirage`. If formats | ||
| 614 | * match but the targets don't, we create an overview View of the registered surface. | ||
| 615 | * | ||
| 616 | * @param current_surface The registered surface in the cache which we want to convert. | ||
| 617 | * @param params The new surface params which we want to check. | ||
| 618 | * @param is_render Whether or not the surface is a render target. | ||
| 619 | **/ | ||
| 620 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | ||
| 621 | const SurfaceParams& params, bool is_render) { | ||
| 622 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 623 | const bool matches_target = current_surface->MatchTarget(params.target); | ||
| 624 | const auto match_check = [&]() -> std::pair<TSurface, TView> { | ||
| 625 | if (matches_target) { | ||
| 626 | return {current_surface, current_surface->GetMainView()}; | ||
| 627 | } | ||
| 628 | return {current_surface, current_surface->EmplaceOverview(params)}; | ||
| 629 | }; | ||
| 630 | if (!is_mirage) { | ||
| 631 | return match_check(); | ||
| 632 | } | ||
| 633 | if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { | ||
| 634 | return match_check(); | ||
| 635 | } | ||
| 636 | return RebuildSurface(current_surface, params, is_render); | ||
| 637 | } | ||
| 638 | |||
| 639 | /** | ||
| 640 | * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate | ||
| 641 | * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps | ||
| 642 | * of the new surface, if they all match we end up recreating a surface for them, | ||
| 643 | * else we return nothing. | ||
| 644 | * | ||
| 645 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 646 | * @param params The parameters on the new surface. | ||
| 647 | * @param gpu_addr The starting address of the new surface. | ||
| 648 | **/ | ||
| 649 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, | ||
| 650 | const SurfaceParams& params, | ||
| 651 | GPUVAddr gpu_addr) { | ||
| 652 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 653 | return std::nullopt; | ||
| 654 | } | ||
| 655 | const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; | ||
| 656 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 657 | 622 | ||
| 658 | if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { | 623 | template <class P> |
| 659 | LoadSurface(new_surface); | 624 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 660 | for (const auto& surface : overlaps) { | 625 | const Tegra::Engines::Fermi2D::Surface& src, |
| 661 | Unregister(surface); | 626 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 662 | } | 627 | const BlitImages images = GetBlitImages(dst, src); |
| 663 | Register(new_surface); | 628 | const ImageId dst_id = images.dst_id; |
| 664 | return {{new_surface, new_surface->GetMainView()}}; | 629 | const ImageId src_id = images.src_id; |
| 665 | } | 630 | PrepareImage(src_id, false, false); |
| 631 | PrepareImage(dst_id, true, false); | ||
| 632 | |||
| 633 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 634 | const ImageBase& src_image = slot_images[src_id]; | ||
| 635 | |||
| 636 | // TODO: Deduplicate | ||
| 637 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 638 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 639 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 640 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 641 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 642 | const std::array src_region{ | ||
| 643 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 644 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 645 | }; | ||
| 666 | 646 | ||
| 667 | std::size_t passed_tests = 0; | 647 | const std::optional src_base = src_image.TryFindBase(src.Address()); |
| 668 | for (auto& surface : overlaps) { | 648 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; |
| 669 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 649 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); |
| 670 | const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | 650 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); |
| 671 | if (!mipmap_layer) { | 651 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); |
| 672 | continue; | 652 | const std::array dst_region{ |
| 673 | } | 653 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, |
| 674 | const auto [base_layer, base_mipmap] = *mipmap_layer; | 654 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, |
| 675 | if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { | 655 | }; |
| 676 | continue; | ||
| 677 | } | ||
| 678 | ++passed_tests; | ||
| 679 | |||
| 680 | // Copy all mipmaps and layers | ||
| 681 | const u32 block_width = params.GetDefaultBlockWidth(); | ||
| 682 | const u32 block_height = params.GetDefaultBlockHeight(); | ||
| 683 | for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { | ||
| 684 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | ||
| 685 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 686 | if (width < block_width || height < block_height) { | ||
| 687 | // Current APIs forbid copying small compressed textures, avoid errors | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | ||
| 691 | src_params.depth); | ||
| 692 | TryCopyImage(surface, new_surface, copy_params); | ||
| 693 | } | ||
| 694 | } | ||
| 695 | if (passed_tests == 0) { | ||
| 696 | return std::nullopt; | ||
| 697 | } | ||
| 698 | if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||
| 699 | // In Accurate GPU all tests should pass, else we recycle | ||
| 700 | return std::nullopt; | ||
| 701 | } | ||
| 702 | 656 | ||
| 703 | const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); | 657 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. |
| 704 | for (const auto& surface : overlaps) { | 658 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
| 705 | Unregister(surface); | 659 | if constexpr (FRAMEBUFFER_BLITS) { |
| 706 | } | 660 | // OpenGL blits from framebuffers, not images |
| 661 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 662 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 663 | copy.filter, copy.operation); | ||
| 664 | } else { | ||
| 665 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 666 | // Provide a framebuffer in case it's necessary | ||
| 667 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 668 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 669 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 670 | copy.operation); | ||
| 671 | } | ||
| 672 | } | ||
| 707 | 673 | ||
| 708 | new_surface->MarkAsModified(modified, Tick()); | 674 | template <class P> |
| 709 | Register(new_surface); | 675 | void TextureCache<P>::InvalidateColorBuffer(size_t index) { |
| 710 | return {{new_surface, new_surface->GetMainView()}}; | 676 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 711 | } | 677 | color_buffer_id = FindColorBuffer(index, false); |
| 712 | 678 | if (!color_buffer_id) { | |
| 713 | /** | 679 | LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); |
| 714 | * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D | 680 | return; |
| 715 | * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of | 681 | } |
| 716 | * the HLE methods. | 682 | // When invalidating a color buffer, the old contents are no longer relevant |
| 717 | * | 683 | ImageView& color_buffer = slot_image_views[color_buffer_id]; |
| 718 | * @param overlaps The overlapping surfaces registered in the cache. | 684 | Image& image = slot_images[color_buffer.image_id]; |
| 719 | * @param params The parameters on the new surface. | 685 | image.flags &= ~ImageFlagBits::CpuModified; |
| 720 | * @param gpu_addr The starting address of the new surface. | 686 | image.flags &= ~ImageFlagBits::GpuModified; |
| 721 | * @param cpu_addr The starting address of the new surface on physical memory. | ||
| 722 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 723 | * left blank. | ||
| 724 | */ | ||
| 725 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, | ||
| 726 | const SurfaceParams& params, | ||
| 727 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 728 | bool preserve_contents) { | ||
| 729 | if (params.target != SurfaceTarget::Texture3D) { | ||
| 730 | for (const auto& surface : overlaps) { | ||
| 731 | if (!surface->MatchTarget(params.target)) { | ||
| 732 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | ||
| 733 | if (Settings::IsGPULevelExtreme()) { | ||
| 734 | return std::nullopt; | ||
| 735 | } | ||
| 736 | Unregister(surface); | ||
| 737 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 738 | } | ||
| 739 | return std::nullopt; | ||
| 740 | } | ||
| 741 | if (surface->GetCpuAddr() != cpu_addr) { | ||
| 742 | continue; | ||
| 743 | } | ||
| 744 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||
| 745 | return std::make_pair(surface, surface->GetMainView()); | ||
| 746 | } | ||
| 747 | } | ||
| 748 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 749 | } | ||
| 750 | 687 | ||
| 751 | if (params.num_levels > 1) { | 688 | runtime.InvalidateColorBuffer(color_buffer, index); |
| 752 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | 689 | } |
| 753 | return std::nullopt; | ||
| 754 | } | ||
| 755 | 690 | ||
| 756 | if (overlaps.size() == 1) { | 691 | template <class P> |
| 757 | const auto& surface = overlaps[0]; | 692 | void TextureCache<P>::InvalidateDepthBuffer() { |
| 758 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | 693 | ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; |
| 759 | // Don't attempt to render to textures with more than one level for now | 694 | depth_buffer_id = FindDepthBuffer(false); |
| 760 | // The texture has to be to the right or the sample address if we want to render to it | 695 | if (!depth_buffer_id) { |
| 761 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | 696 | LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); |
| 762 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | 697 | return; |
| 763 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 698 | } |
| 764 | if (slice < overlap_params.depth) { | 699 | // When invalidating the depth buffer, the old contents are no longer relevant |
| 765 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | 700 | ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; |
| 766 | return std::make_pair(std::move(surface), std::move(view)); | 701 | image.flags &= ~ImageFlagBits::CpuModified; |
| 767 | } | 702 | image.flags &= ~ImageFlagBits::GpuModified; |
| 768 | } | ||
| 769 | } | ||
| 770 | 703 | ||
| 771 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | 704 | ImageView& depth_buffer = slot_image_views[depth_buffer_id]; |
| 772 | bool modified = false; | 705 | runtime.InvalidateDepthBuffer(depth_buffer); |
| 706 | } | ||
| 773 | 707 | ||
| 774 | for (auto& surface : overlaps) { | 708 | template <class P> |
| 775 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 709 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { |
| 776 | if (src_params.target != SurfaceTarget::Texture2D || | 710 | // TODO: Properly implement this |
| 777 | src_params.height != params.height || | 711 | const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); |
| 778 | src_params.block_depth != params.block_depth || | 712 | if (it == page_table.end()) { |
| 779 | src_params.block_height != params.block_height) { | 713 | return nullptr; |
| 780 | return std::nullopt; | 714 | } |
| 781 | } | 715 | const auto& image_ids = it->second; |
| 782 | modified |= surface->IsModified(); | 716 | for (const ImageId image_id : image_ids) { |
| 783 | 717 | const ImageBase& image = slot_images[image_id]; | |
| 784 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | 718 | if (image.cpu_addr != cpu_addr) { |
| 785 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 719 | continue; |
| 786 | const u32 width = params.width; | ||
| 787 | const u32 height = params.height; | ||
| 788 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 789 | TryCopyImage(surface, new_surface, copy_params); | ||
| 790 | } | 720 | } |
| 791 | for (const auto& surface : overlaps) { | 721 | if (image.image_view_ids.empty()) { |
| 792 | Unregister(surface); | 722 | continue; |
| 793 | } | 723 | } |
| 794 | new_surface->MarkAsModified(modified, Tick()); | 724 | return &slot_image_views[image.image_view_ids.at(0)]; |
| 795 | Register(new_surface); | 725 | } |
| 796 | 726 | return nullptr; | |
| 797 | TView view = new_surface->GetMainView(); | 727 | } |
| 798 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 799 | } | ||
| 800 | |||
| 801 | /** | ||
| 802 | * Gets the starting address and parameters of a candidate surface and tries | ||
| 803 | * to find a matching surface within the cache. This is done in 3 big steps: | ||
| 804 | * | ||
| 805 | * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | ||
| 806 | * | ||
| 807 | * 2. Check if there are any overlaps at all, if there are none, we just load the texture from | ||
| 808 | * memory else we move to step 3. | ||
| 809 | * | ||
| 810 | * 3. Consists of figuring out the relationship between the candidate texture and the | ||
| 811 | * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If | ||
| 812 | * there's many, we just try to reconstruct a new surface out of them based on the | ||
| 813 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we | ||
| 814 | * have to check if the candidate is a view (layer/mipmap) of the overlap or if the | ||
| 815 | * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct | ||
| 816 | * a new surface. | ||
| 817 | * | ||
| 818 | * @param gpu_addr The starting address of the candidate surface. | ||
| 819 | * @param params The parameters on the candidate surface. | ||
| 820 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 821 | * left blank. | ||
| 822 | * @param is_render Whether or not the surface is a render target. | ||
| 823 | **/ | ||
| 824 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||
| 825 | const SurfaceParams& params, bool preserve_contents, | ||
| 826 | bool is_render) { | ||
| 827 | // Step 1 | ||
| 828 | // Check Level 1 Cache for a fast structural match. If candidate surface | ||
| 829 | // matches at certain level we are pretty much done. | ||
| 830 | if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { | ||
| 831 | TSurface& current_surface = iter->second; | ||
| 832 | const auto topological_result = current_surface->MatchesTopology(params); | ||
| 833 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 834 | VectorSurface overlaps{current_surface}; | ||
| 835 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 836 | topological_result); | ||
| 837 | } | ||
| 838 | 728 | ||
| 839 | const auto struct_result = current_surface->MatchesStructure(params); | 729 | template <class P> |
| 840 | if (struct_result != MatchStructureResult::None) { | 730 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { |
| 841 | const auto& old_params = current_surface->GetSurfaceParams(); | 731 | return !uncommitted_downloads.empty(); |
| 842 | const bool not_3d = params.target != SurfaceTarget::Texture3D && | 732 | } |
| 843 | old_params.target != SurfaceTarget::Texture3D; | ||
| 844 | if (not_3d || current_surface->MatchTarget(params.target)) { | ||
| 845 | if (struct_result == MatchStructureResult::FullMatch) { | ||
| 846 | return ManageStructuralMatch(current_surface, params, is_render); | ||
| 847 | } else { | ||
| 848 | return RebuildSurface(current_surface, params, is_render); | ||
| 849 | } | ||
| 850 | } | ||
| 851 | } | ||
| 852 | } | ||
| 853 | 733 | ||
| 854 | // Step 2 | 734 | template <class P> |
| 855 | // Obtain all possible overlaps in the memory region | 735 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 856 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 736 | return !committed_downloads.empty() && !committed_downloads.front().empty(); |
| 857 | auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; | 737 | } |
| 858 | 738 | ||
| 859 | // If none are found, we are done. we just load the surface and create it. | 739 | template <class P> |
| 860 | if (overlaps.empty()) { | 740 | void TextureCache<P>::CommitAsyncFlushes() { |
| 861 | return InitializeSurface(gpu_addr, params, preserve_contents); | 741 | // This is intentionally passing the value by copy |
| 862 | } | 742 | committed_downloads.push(uncommitted_downloads); |
| 743 | uncommitted_downloads.clear(); | ||
| 744 | } | ||
| 863 | 745 | ||
| 864 | // Step 3 | 746 | template <class P> |
| 865 | // Now we need to figure the relationship between the texture and its overlaps | 747 | void TextureCache<P>::PopAsyncFlushes() { |
| 866 | // we do a topological test to ensure we can find some relationship. If it fails | 748 | if (committed_downloads.empty()) { |
| 867 | // immediately recycle the texture | 749 | return; |
| 868 | for (const auto& surface : overlaps) { | 750 | } |
| 869 | const auto topological_result = surface->MatchesTopology(params); | 751 | const std::span<const ImageId> download_ids = committed_downloads.front(); |
| 870 | if (topological_result != MatchTopologyResult::FullMatch) { | 752 | if (download_ids.empty()) { |
| 871 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 753 | committed_downloads.pop(); |
| 872 | topological_result); | 754 | return; |
| 873 | } | 755 | } |
| 874 | } | 756 | size_t total_size_bytes = 0; |
| 757 | for (const ImageId image_id : download_ids) { | ||
| 758 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 759 | } | ||
| 760 | auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | ||
| 761 | size_t buffer_offset = 0; | ||
| 762 | for (const ImageId image_id : download_ids) { | ||
| 763 | Image& image = slot_images[image_id]; | ||
| 764 | const auto copies = FullDownloadCopies(image.info); | ||
| 765 | image.DownloadMemory(download_map, buffer_offset, copies); | ||
| 766 | buffer_offset += image.unswizzled_size_bytes; | ||
| 767 | } | ||
| 768 | // Wait for downloads to finish | ||
| 769 | runtime.Finish(); | ||
| 770 | |||
| 771 | buffer_offset = 0; | ||
| 772 | const std::span<u8> download_span = download_map.Span(); | ||
| 773 | for (const ImageId image_id : download_ids) { | ||
| 774 | const ImageBase& image = slot_images[image_id]; | ||
| 775 | const auto copies = FullDownloadCopies(image.info); | ||
| 776 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | ||
| 777 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | ||
| 778 | buffer_offset += image.unswizzled_size_bytes; | ||
| 779 | } | ||
| 780 | committed_downloads.pop(); | ||
| 781 | } | ||
| 875 | 782 | ||
| 876 | // Manage 3D textures | 783 | template <class P> |
| 877 | if (params.block_depth > 0) { | 784 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 878 | auto surface = | 785 | bool is_modified = false; |
| 879 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 786 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| 880 | if (surface) { | 787 | if (False(image.flags & ImageFlagBits::GpuModified)) { |
| 881 | return *surface; | 788 | return false; |
| 882 | } | ||
| 883 | } | 789 | } |
| 790 | is_modified = true; | ||
| 791 | return true; | ||
| 792 | }); | ||
| 793 | return is_modified; | ||
| 794 | } | ||
| 884 | 795 | ||
| 885 | // Split cases between 1 overlap or many. | 796 | template <class P> |
| 886 | if (overlaps.size() == 1) { | 797 | void TextureCache<P>::RefreshContents(Image& image) { |
| 887 | TSurface current_surface = overlaps[0]; | 798 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 888 | // First check if the surface is within the overlap. If not, it means | 799 | // Only upload modified images |
| 889 | // two things either the candidate surface is a supertexture of the overlap | 800 | return; |
| 890 | // or they don't match in any known way. | 801 | } |
| 891 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | 802 | image.flags &= ~ImageFlagBits::CpuModified; |
| 892 | const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); | 803 | TrackImage(image); |
| 893 | if (view) { | ||
| 894 | return *view; | ||
| 895 | } | ||
| 896 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 897 | MatchTopologyResult::FullMatch); | ||
| 898 | } | ||
| 899 | // Now we check if the candidate is a mipmap/layer of the overlap | ||
| 900 | std::optional<TView> view = | ||
| 901 | current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||
| 902 | if (view) { | ||
| 903 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 904 | if (is_mirage) { | ||
| 905 | // On a mirage view, we need to recreate the surface under this new view | ||
| 906 | // and then obtain a view again. | ||
| 907 | SurfaceParams new_params = current_surface->GetSurfaceParams(); | ||
| 908 | const u32 wh = SurfaceParams::ConvertWidth( | ||
| 909 | new_params.width, new_params.pixel_format, params.pixel_format); | ||
| 910 | const u32 hh = SurfaceParams::ConvertHeight( | ||
| 911 | new_params.height, new_params.pixel_format, params.pixel_format); | ||
| 912 | new_params.width = wh; | ||
| 913 | new_params.height = hh; | ||
| 914 | new_params.pixel_format = params.pixel_format; | ||
| 915 | std::pair<TSurface, TView> pair = | ||
| 916 | RebuildSurface(current_surface, new_params, is_render); | ||
| 917 | std::optional<TView> mirage_view = | ||
| 918 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||
| 919 | if (mirage_view) | ||
| 920 | return {pair.first, *mirage_view}; | ||
| 921 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 922 | MatchTopologyResult::FullMatch); | ||
| 923 | } | ||
| 924 | return {current_surface, *view}; | ||
| 925 | } | ||
| 926 | } else { | ||
| 927 | // If there are many overlaps, odds are they are subtextures of the candidate | ||
| 928 | // surface. We try to construct a new surface based on the candidate parameters, | ||
| 929 | // using the overlaps. If a single overlap fails, this will fail. | ||
| 930 | std::optional<std::pair<TSurface, TView>> view = | ||
| 931 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 932 | if (view) { | ||
| 933 | return *view; | ||
| 934 | } | ||
| 935 | } | ||
| 936 | // We failed all the tests, recycle the overlaps into a new texture. | ||
| 937 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 938 | MatchTopologyResult::FullMatch); | ||
| 939 | } | ||
| 940 | |||
| 941 | /** | ||
| 942 | * Gets the starting address and parameters of a candidate surface and tries to find a | ||
| 943 | * matching surface within the cache that's similar to it. If there are many textures | ||
| 944 | * or the texture found if entirely incompatible, it will fail. If no texture is found, the | ||
| 945 | * blit will be unsuccessful. | ||
| 946 | * | ||
| 947 | * @param gpu_addr The starting address of the candidate surface. | ||
| 948 | * @param params The parameters on the candidate surface. | ||
| 949 | **/ | ||
| 950 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 951 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 952 | |||
| 953 | if (!cpu_addr) { | ||
| 954 | Deduction result{}; | ||
| 955 | result.type = DeductionType::DeductionFailed; | ||
| 956 | return result; | ||
| 957 | } | ||
| 958 | 804 | ||
| 959 | if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { | 805 | if (image.info.num_samples > 1) { |
| 960 | TSurface& current_surface = iter->second; | 806 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 961 | const auto topological_result = current_surface->MatchesTopology(params); | 807 | return; |
| 962 | if (topological_result != MatchTopologyResult::FullMatch) { | 808 | } |
| 963 | Deduction result{}; | 809 | auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); |
| 964 | result.type = DeductionType::DeductionFailed; | 810 | UploadImageContents(image, map, 0); |
| 965 | return result; | 811 | runtime.InsertUploadMemoryBarrier(); |
| 966 | } | 812 | } |
| 967 | const auto struct_result = current_surface->MatchesStructure(params); | ||
| 968 | if (struct_result != MatchStructureResult::None && | ||
| 969 | current_surface->MatchTarget(params.target)) { | ||
| 970 | Deduction result{}; | ||
| 971 | result.type = DeductionType::DeductionComplete; | ||
| 972 | result.surface = current_surface; | ||
| 973 | return result; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | 813 | ||
| 977 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 814 | template <class P> |
| 978 | auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; | 815 | template <typename MapBuffer> |
| 816 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | ||
| 817 | const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | ||
| 818 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 819 | |||
| 820 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 821 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 822 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 823 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | ||
| 824 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 825 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 826 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 827 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 828 | image.UploadMemory(map, buffer_offset, copies); | ||
| 829 | } else if (image.info.type == ImageType::Buffer) { | ||
| 830 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | ||
| 831 | image.UploadMemory(map, buffer_offset, copies); | ||
| 832 | } else { | ||
| 833 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 834 | image.UploadMemory(map, buffer_offset, copies); | ||
| 835 | } | ||
| 836 | } | ||
| 979 | 837 | ||
| 980 | if (overlaps.empty()) { | 838 | template <class P> |
| 981 | Deduction result{}; | 839 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 982 | result.type = DeductionType::DeductionIncomplete; | 840 | if (!IsValidAddress(gpu_memory, config)) { |
| 983 | return result; | 841 | return NULL_IMAGE_VIEW_ID; |
| 984 | } | 842 | } |
| 843 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 844 | ImageViewId& image_view_id = pair->second; | ||
| 845 | if (is_new) { | ||
| 846 | image_view_id = CreateImageView(config); | ||
| 847 | } | ||
| 848 | return image_view_id; | ||
| 849 | } | ||
| 985 | 850 | ||
| 986 | if (overlaps.size() > 1) { | 851 | template <class P> |
| 987 | Deduction result{}; | 852 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { |
| 988 | result.type = DeductionType::DeductionFailed; | 853 | const ImageInfo info(config); |
| 989 | return result; | 854 | const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; |
| 990 | } else { | 855 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); |
| 991 | Deduction result{}; | 856 | if (!image_id) { |
| 992 | result.type = DeductionType::DeductionComplete; | 857 | return NULL_IMAGE_VIEW_ID; |
| 993 | result.surface = overlaps[0]; | ||
| 994 | return result; | ||
| 995 | } | ||
| 996 | } | 858 | } |
| 859 | ImageBase& image = slot_images[image_id]; | ||
| 860 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 861 | ASSERT(base.level == 0); | ||
| 862 | const ImageViewInfo view_info(config, base.layer); | ||
| 863 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 864 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 865 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 866 | image.flags |= ImageFlagBits::Strong; | ||
| 867 | return image_view_id; | ||
| 868 | } | ||
| 997 | 869 | ||
| 998 | /** | 870 | template <class P> |
| 999 | * Gets a null surface based on a target texture. | 871 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1000 | * @param target The target of the null surface. | 872 | RelaxedOptions options) { |
| 1001 | */ | 873 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { |
| 1002 | TView GetNullSurface(SurfaceTarget target) { | 874 | return image_id; |
| 1003 | const u32 i_target = static_cast<u32>(target); | 875 | } |
| 1004 | if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { | 876 | return InsertImage(info, gpu_addr, options); |
| 1005 | return it->second->GetMainView(); | 877 | } |
| 1006 | } | 878 | |
| 1007 | SurfaceParams params{}; | 879 | template <class P> |
| 1008 | params.target = target; | 880 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1009 | params.is_tiled = false; | 881 | RelaxedOptions options) { |
| 1010 | params.srgb_conversion = false; | 882 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1011 | params.is_layered = | 883 | if (!cpu_addr) { |
| 1012 | target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || | 884 | return ImageId{}; |
| 1013 | target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; | 885 | } |
| 1014 | params.block_width = 0; | 886 | ImageId image_id; |
| 1015 | params.block_height = 0; | 887 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1016 | params.block_depth = 0; | 888 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |
| 1017 | params.tile_width_spacing = 1; | 889 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 1018 | params.width = 1; | 890 | True(existing_image.flags & ImageFlagBits::Strong); |
| 1019 | params.height = 1; | 891 | const ImageInfo& existing = existing_image.info; |
| 1020 | params.depth = 1; | 892 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && |
| 1021 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | 893 | existing.pitch == info.pitch && |
| 1022 | params.depth = 6; | 894 | IsPitchLinearSameSize(existing, info, strict_size) && |
| 1023 | } | 895 | IsViewCompatible(existing.format, info.format)) { |
| 1024 | params.pitch = 4; | 896 | image_id = existing_image_id; |
| 1025 | params.num_levels = 1; | 897 | return true; |
| 1026 | params.emulated_levels = 1; | 898 | } |
| 1027 | params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; | 899 | } else if (IsSubresource(info, existing_image, gpu_addr, options)) { |
| 1028 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 900 | image_id = existing_image_id; |
| 1029 | auto surface = CreateSurface(0ULL, params); | 901 | return true; |
| 1030 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | ||
| 1031 | surface->UploadTexture(invalid_memory); | ||
| 1032 | surface->MarkAsModified(false, Tick()); | ||
| 1033 | invalid_cache.emplace(i_target, surface); | ||
| 1034 | return surface->GetMainView(); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /** | ||
| 1038 | * Gets the a source and destination starting address and parameters, | ||
| 1039 | * and tries to deduce if they are supposed to be depth textures. If so, their | ||
| 1040 | * parameters are modified and fixed into so. | ||
| 1041 | * | ||
| 1042 | * @param src_params The parameters of the candidate surface. | ||
| 1043 | * @param dst_params The parameters of the destination surface. | ||
| 1044 | * @param src_gpu_addr The starting address of the candidate surface. | ||
| 1045 | * @param dst_gpu_addr The starting address of the destination surface. | ||
| 1046 | **/ | ||
| 1047 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | ||
| 1048 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | ||
| 1049 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); | ||
| 1050 | auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); | ||
| 1051 | if (deduced_src.Failed() || deduced_dst.Failed()) { | ||
| 1052 | return; | ||
| 1053 | } | 902 | } |
| 903 | return false; | ||
| 904 | }; | ||
| 905 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 906 | return image_id; | ||
| 907 | } | ||
| 1054 | 908 | ||
| 1055 | const bool incomplete_src = deduced_src.Incomplete(); | 909 | template <class P> |
| 1056 | const bool incomplete_dst = deduced_dst.Incomplete(); | 910 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 911 | RelaxedOptions options) { | ||
| 912 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 913 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 914 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 915 | const Image& image = slot_images[image_id]; | ||
| 916 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 917 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 918 | if (is_new) { | ||
| 919 | it->second = slot_image_allocs.insert(); | ||
| 920 | } | ||
| 921 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 922 | return image_id; | ||
| 923 | } | ||
| 1057 | 924 | ||
| 1058 | if (incomplete_src && incomplete_dst) { | 925 | template <class P> |
| 926 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 927 | ImageInfo new_info = info; | ||
| 928 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 929 | std::vector<ImageId> overlap_ids; | ||
| 930 | std::vector<ImageId> left_aliased_ids; | ||
| 931 | std::vector<ImageId> right_aliased_ids; | ||
| 932 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 933 | if (info.type != overlap.info.type) { | ||
| 1059 | return; | 934 | return; |
| 1060 | } | 935 | } |
| 1061 | 936 | if (info.type == ImageType::Linear) { | |
| 1062 | const bool any_incomplete = incomplete_src || incomplete_dst; | 937 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 1063 | 938 | // Alias linear images with the same pitch | |
| 1064 | if (!any_incomplete) { | 939 | left_aliased_ids.push_back(overlap_id); |
| 1065 | if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { | ||
| 1066 | return; | ||
| 1067 | } | ||
| 1068 | } else { | ||
| 1069 | if (incomplete_src && !(deduced_dst.IsDepth())) { | ||
| 1070 | return; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | if (incomplete_dst && !(deduced_src.IsDepth())) { | ||
| 1074 | return; | ||
| 1075 | } | 940 | } |
| 941 | return; | ||
| 942 | } | ||
| 943 | const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); | ||
| 944 | if (solution) { | ||
| 945 | gpu_addr = solution->gpu_addr; | ||
| 946 | cpu_addr = solution->cpu_addr; | ||
| 947 | new_info.resources = solution->resources; | ||
| 948 | overlap_ids.push_back(overlap_id); | ||
| 949 | return; | ||
| 950 | } | ||
| 951 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 952 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||
| 953 | if (IsSubresource(new_info, overlap, gpu_addr, options)) { | ||
| 954 | left_aliased_ids.push_back(overlap_id); | ||
| 955 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { | ||
| 956 | right_aliased_ids.push_back(overlap_id); | ||
| 1076 | } | 957 | } |
| 958 | }); | ||
| 959 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 960 | Image& new_image = slot_images[new_image_id]; | ||
| 1077 | 961 | ||
| 1078 | const auto inherit_format = [](SurfaceParams& to, TSurface from) { | 962 | // TODO: Only upload what we need |
| 1079 | const SurfaceParams& params = from->GetSurfaceParams(); | 963 | RefreshContents(new_image); |
| 1080 | to.pixel_format = params.pixel_format; | 964 | |
| 1081 | to.type = params.type; | 965 | for (const ImageId overlap_id : overlap_ids) { |
| 1082 | }; | 966 | Image& overlap = slot_images[overlap_id]; |
| 1083 | // Now we got the cases where one or both is Depth and the other is not known | 967 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 1084 | if (!incomplete_src) { | 968 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 1085 | inherit_format(src_params, deduced_src.surface); | ||
| 1086 | } else { | 969 | } else { |
| 1087 | inherit_format(src_params, deduced_dst.surface); | 970 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); |
| 971 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 972 | runtime.CopyImage(new_image, overlap, copies); | ||
| 1088 | } | 973 | } |
| 1089 | if (!incomplete_dst) { | 974 | if (True(overlap.flags & ImageFlagBits::Tracked)) { |
| 1090 | inherit_format(dst_params, deduced_dst.surface); | 975 | UntrackImage(overlap); |
| 1091 | } else { | ||
| 1092 | inherit_format(dst_params, deduced_src.surface); | ||
| 1093 | } | 976 | } |
| 977 | UnregisterImage(overlap_id); | ||
| 978 | DeleteImage(overlap_id); | ||
| 979 | } | ||
| 980 | ImageBase& new_image_base = new_image; | ||
| 981 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 982 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 983 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 984 | } | ||
| 985 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 986 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 987 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 1094 | } | 988 | } |
| 989 | RegisterImage(new_image_id); | ||
| 990 | return new_image_id; | ||
| 991 | } | ||
| 1095 | 992 | ||
| 1096 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | 993 | template <class P> |
| 1097 | bool preserve_contents) { | 994 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( |
| 1098 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | 995 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { |
| 1099 | Register(new_surface); | 996 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; |
| 1100 | if (preserve_contents) { | 997 | const GPUVAddr dst_addr = dst.Address(); |
| 1101 | LoadSurface(new_surface); | 998 | const GPUVAddr src_addr = src.Address(); |
| 1102 | } | 999 | ImageInfo dst_info(dst); |
| 1103 | return {new_surface, new_surface->GetMainView()}; | 1000 | ImageInfo src_info(src); |
| 1001 | ImageId dst_id; | ||
| 1002 | ImageId src_id; | ||
| 1003 | do { | ||
| 1004 | has_deleted_images = false; | ||
| 1005 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 1006 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 1007 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 1008 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 1009 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 1010 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 1011 | continue; | ||
| 1012 | } | ||
| 1013 | if (!dst_id) { | ||
| 1014 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1015 | } | ||
| 1016 | if (!src_id) { | ||
| 1017 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 1018 | } | ||
| 1019 | } while (has_deleted_images); | ||
| 1020 | return BlitImages{ | ||
| 1021 | .dst_id = dst_id, | ||
| 1022 | .src_id = src_id, | ||
| 1023 | .dst_format = dst_info.format, | ||
| 1024 | .src_format = src_info.format, | ||
| 1025 | }; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | template <class P> | ||
| 1029 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 1030 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 1031 | return NULL_SAMPLER_ID; | ||
| 1032 | } | ||
| 1033 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 1034 | if (is_new) { | ||
| 1035 | pair->second = slot_samplers.insert(runtime, config); | ||
| 1104 | } | 1036 | } |
| 1037 | return pair->second; | ||
| 1038 | } | ||
| 1105 | 1039 | ||
| 1106 | void LoadSurface(const TSurface& surface) { | 1040 | template <class P> |
| 1107 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1041 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
| 1108 | surface->LoadBuffer(gpu_memory, staging_cache); | 1042 | const auto& regs = maxwell3d.regs; |
| 1109 | surface->UploadTexture(staging_cache.GetBuffer(0)); | 1043 | if (index >= regs.rt_control.count) { |
| 1110 | surface->MarkAsModified(false, Tick()); | 1044 | return ImageViewId{}; |
| 1045 | } | ||
| 1046 | const auto& rt = regs.rt[index]; | ||
| 1047 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 1048 | if (gpu_addr == 0) { | ||
| 1049 | return ImageViewId{}; | ||
| 1050 | } | ||
| 1051 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1052 | return ImageViewId{}; | ||
| 1111 | } | 1053 | } |
| 1054 | const ImageInfo info(regs, index); | ||
| 1055 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1056 | } | ||
| 1112 | 1057 | ||
| 1113 | void FlushSurface(const TSurface& surface) { | 1058 | template <class P> |
| 1114 | if (!surface->IsModified()) { | 1059 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
| 1115 | return; | 1060 | const auto& regs = maxwell3d.regs; |
| 1116 | } | 1061 | if (!regs.zeta_enable) { |
| 1117 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1062 | return ImageViewId{}; |
| 1118 | surface->DownloadTexture(staging_cache.GetBuffer(0)); | 1063 | } |
| 1119 | surface->FlushBuffer(gpu_memory, staging_cache); | 1064 | const GPUVAddr gpu_addr = regs.zeta.Address(); |
| 1120 | surface->MarkAsModified(false, Tick()); | 1065 | if (gpu_addr == 0) { |
| 1121 | } | 1066 | return ImageViewId{}; |
| 1122 | |||
| 1123 | void RegisterInnerCache(TSurface& surface) { | ||
| 1124 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 1125 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1126 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | ||
| 1127 | l1_cache[cpu_addr] = surface; | ||
| 1128 | while (start <= end) { | ||
| 1129 | registry[start].push_back(surface); | ||
| 1130 | start++; | ||
| 1131 | } | ||
| 1132 | } | 1067 | } |
| 1068 | const ImageInfo info(regs); | ||
| 1069 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1070 | } | ||
| 1133 | 1071 | ||
| 1134 | void UnregisterInnerCache(TSurface& surface) { | 1072 | template <class P> |
| 1135 | const VAddr cpu_addr = surface->GetCpuAddr(); | 1073 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1136 | VAddr start = cpu_addr >> registry_page_bits; | 1074 | bool is_clear) { |
| 1137 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | 1075 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; |
| 1138 | l1_cache.erase(cpu_addr); | 1076 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); |
| 1139 | while (start <= end) { | 1077 | if (!image_id) { |
| 1140 | auto& reg{registry[start]}; | 1078 | return NULL_IMAGE_VIEW_ID; |
| 1141 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | 1079 | } |
| 1142 | start++; | 1080 | Image& image = slot_images[image_id]; |
| 1143 | } | 1081 | const ImageViewType view_type = RenderTargetImageViewType(info); |
| 1082 | SubresourceBase base; | ||
| 1083 | if (image.info.type == ImageType::Linear) { | ||
| 1084 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 1085 | } else { | ||
| 1086 | base = image.TryFindBase(gpu_addr).value(); | ||
| 1144 | } | 1087 | } |
| 1088 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 1089 | const SubresourceRange range{ | ||
| 1090 | .base = base, | ||
| 1091 | .extent = {.levels = 1, .layers = layers}, | ||
| 1092 | }; | ||
| 1093 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 1094 | } | ||
| 1145 | 1095 | ||
| 1146 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1096 | template <class P> |
| 1147 | if (size == 0) { | 1097 | template <typename Func> |
| 1148 | return {}; | 1098 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { |
| 1099 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1100 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1101 | boost::container::small_vector<ImageId, 32> images; | ||
| 1102 | ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { | ||
| 1103 | const auto it = page_table.find(page); | ||
| 1104 | if (it == page_table.end()) { | ||
| 1105 | if constexpr (BOOL_BREAK) { | ||
| 1106 | return false; | ||
| 1107 | } else { | ||
| 1108 | return; | ||
| 1109 | } | ||
| 1149 | } | 1110 | } |
| 1150 | const VAddr cpu_addr_end = cpu_addr + size; | 1111 | for (const ImageId image_id : it->second) { |
| 1151 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1112 | Image& image = slot_images[image_id]; |
| 1152 | VectorSurface surfaces; | 1113 | if (True(image.flags & ImageFlagBits::Picked)) { |
| 1153 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { | ||
| 1154 | const auto it = registry.find(start); | ||
| 1155 | if (it == registry.end()) { | ||
| 1156 | continue; | 1114 | continue; |
| 1157 | } | 1115 | } |
| 1158 | for (auto& surface : it->second) { | 1116 | if (!image.Overlaps(cpu_addr, size)) { |
| 1159 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1117 | continue; |
| 1160 | continue; | 1118 | } |
| 1119 | image.flags |= ImageFlagBits::Picked; | ||
| 1120 | images.push_back(image_id); | ||
| 1121 | if constexpr (BOOL_BREAK) { | ||
| 1122 | if (func(image_id, image)) { | ||
| 1123 | return true; | ||
| 1161 | } | 1124 | } |
| 1162 | surface->MarkAsPicked(true); | 1125 | } else { |
| 1163 | surfaces.push_back(surface); | 1126 | func(image_id, image); |
| 1164 | } | 1127 | } |
| 1165 | } | 1128 | } |
| 1166 | for (auto& surface : surfaces) { | 1129 | if constexpr (BOOL_BREAK) { |
| 1167 | surface->MarkAsPicked(false); | 1130 | return false; |
| 1168 | } | 1131 | } |
| 1169 | return surfaces; | 1132 | }); |
| 1133 | for (const ImageId image_id : images) { | ||
| 1134 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1170 | } | 1135 | } |
| 1136 | } | ||
| 1171 | 1137 | ||
| 1172 | void ReserveSurface(const SurfaceParams& params, TSurface surface) { | 1138 | template <class P> |
| 1173 | surface_reserve[params].push_back(std::move(surface)); | 1139 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { |
| 1140 | Image& image = slot_images[image_id]; | ||
| 1141 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1142 | return image_view_id; | ||
| 1174 | } | 1143 | } |
| 1144 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1145 | image.InsertView(info, image_view_id); | ||
| 1146 | return image_view_id; | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | template <class P> | ||
| 1150 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1151 | ImageBase& image = slot_images[image_id]; | ||
| 1152 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1153 | "Trying to register an already registered image"); | ||
| 1154 | image.flags |= ImageFlagBits::Registered; | ||
| 1155 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1156 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | ||
| 1157 | } | ||
| 1175 | 1158 | ||
| 1176 | TSurface TryGetReservedSurface(const SurfaceParams& params) { | 1159 | template <class P> |
| 1177 | auto search{surface_reserve.find(params)}; | 1160 | void TextureCache<P>::UnregisterImage(ImageId image_id) { |
| 1178 | if (search == surface_reserve.end()) { | 1161 | Image& image = slot_images[image_id]; |
| 1179 | return {}; | 1162 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1163 | "Trying to unregister an already registered image"); | ||
| 1164 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1165 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||
| 1166 | const auto page_it = page_table.find(page); | ||
| 1167 | if (page_it == page_table.end()) { | ||
| 1168 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); | ||
| 1169 | return; | ||
| 1180 | } | 1170 | } |
| 1181 | for (auto& surface : search->second) { | 1171 | std::vector<ImageId>& image_ids = page_it->second; |
| 1182 | if (!surface->IsRegistered()) { | 1172 | const auto vector_it = std::ranges::find(image_ids, image_id); |
| 1183 | return surface; | 1173 | if (vector_it == image_ids.end()) { |
| 1184 | } | 1174 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); |
| 1175 | return; | ||
| 1185 | } | 1176 | } |
| 1186 | return {}; | 1177 | image_ids.erase(vector_it); |
| 1187 | } | 1178 | }); |
| 1179 | } | ||
| 1188 | 1180 | ||
| 1189 | /// Try to do an image copy logging when formats are incompatible. | 1181 | template <class P> |
| 1190 | void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { | 1182 | void TextureCache<P>::TrackImage(ImageBase& image) { |
| 1191 | const SurfaceParams& src_params = src->GetSurfaceParams(); | 1183 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1192 | const SurfaceParams& dst_params = dst->GetSurfaceParams(); | 1184 | image.flags |= ImageFlagBits::Tracked; |
| 1193 | if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { | 1185 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 1194 | LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, | 1186 | } |
| 1195 | src_params.pixel_format); | 1187 | |
| 1196 | return; | 1188 | template <class P> |
| 1189 | void TextureCache<P>::UntrackImage(ImageBase& image) { | ||
| 1190 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1191 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1192 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | template <class P> | ||
| 1196 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1197 | ImageBase& image = slot_images[image_id]; | ||
| 1198 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1199 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1200 | if (alloc_it == image_allocs_table.end()) { | ||
| 1201 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1202 | gpu_addr); | ||
| 1203 | return; | ||
| 1204 | } | ||
| 1205 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1206 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1207 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1208 | if (alloc_image_it == alloc_images.end()) { | ||
| 1209 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1210 | return; | ||
| 1211 | } | ||
| 1212 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1213 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1214 | |||
| 1215 | // Mark render targets as dirty | ||
| 1216 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1217 | dirty[Dirty::RenderTargets] = true; | ||
| 1218 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1219 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1220 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1221 | } | ||
| 1222 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1223 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1224 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1225 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1226 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1197 | } | 1227 | } |
| 1198 | ImageCopy(src, dst, copy); | ||
| 1199 | } | 1228 | } |
| 1229 | RemoveImageViewReferences(image_view_ids); | ||
| 1230 | RemoveFramebuffers(image_view_ids); | ||
| 1231 | |||
| 1232 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1233 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1234 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1235 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1236 | return other_alias.id == image_id; | ||
| 1237 | }); | ||
| 1238 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1239 | num_removed_aliases); | ||
| 1240 | } | ||
| 1241 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1242 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1243 | slot_image_views.erase(image_view_id); | ||
| 1244 | } | ||
| 1245 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1246 | slot_images.erase(image_id); | ||
| 1200 | 1247 | ||
| 1201 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | 1248 | alloc_images.erase(alloc_image_it); |
| 1202 | return siblings_table[static_cast<std::size_t>(format)]; | 1249 | if (alloc_images.empty()) { |
| 1250 | image_allocs_table.erase(alloc_it); | ||
| 1203 | } | 1251 | } |
| 1252 | if constexpr (ENABLE_VALIDATION) { | ||
| 1253 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1254 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1255 | } | ||
| 1256 | graphics_image_table.Invalidate(); | ||
| 1257 | compute_image_table.Invalidate(); | ||
| 1258 | has_deleted_images = true; | ||
| 1259 | } | ||
| 1204 | 1260 | ||
| 1205 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | 1261 | template <class P> |
| 1206 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | 1262 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
| 1207 | const VideoCommon::Shader::Sampler& entry) { | 1263 | auto it = image_views.begin(); |
| 1208 | const auto shader_type = entry.type; | 1264 | while (it != image_views.end()) { |
| 1209 | switch (tic_type) { | 1265 | const auto found = std::ranges::find(removed_views, it->second); |
| 1210 | case Tegra::Texture::TextureType::Texture1D: | 1266 | if (found != removed_views.end()) { |
| 1211 | case Tegra::Texture::TextureType::Texture1DArray: | 1267 | it = image_views.erase(it); |
| 1212 | return shader_type == Tegra::Shader::TextureType::Texture1D; | 1268 | } else { |
| 1213 | case Tegra::Texture::TextureType::Texture1DBuffer: | 1269 | ++it; |
| 1214 | // TODO(Rodrigo): Assume as valid for now | ||
| 1215 | return true; | ||
| 1216 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1217 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1218 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1219 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1220 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1221 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1222 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1223 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1224 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1225 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1226 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1227 | return true; | ||
| 1228 | } | ||
| 1229 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; | ||
| 1230 | } | 1270 | } |
| 1231 | UNREACHABLE(); | ||
| 1232 | return true; | ||
| 1233 | } | 1271 | } |
| 1272 | } | ||
| 1234 | 1273 | ||
| 1235 | struct FramebufferTargetInfo { | 1274 | template <class P> |
| 1236 | TSurface target; | 1275 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { |
| 1237 | TView view; | 1276 | auto it = framebuffers.begin(); |
| 1238 | }; | 1277 | while (it != framebuffers.end()) { |
| 1239 | 1278 | if (it->first.Contains(removed_views)) { | |
| 1240 | void AsyncFlushSurface(TSurface& surface) { | 1279 | it = framebuffers.erase(it); |
| 1241 | if (!uncommitted_flushes) { | 1280 | } else { |
| 1242 | uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | 1281 | ++it; |
| 1243 | } | 1282 | } |
| 1244 | uncommitted_flushes->push_back(surface); | ||
| 1245 | } | 1283 | } |
| 1284 | } | ||
| 1246 | 1285 | ||
| 1247 | VideoCore::RasterizerInterface& rasterizer; | 1286 | template <class P> |
| 1248 | Tegra::Engines::Maxwell3D& maxwell3d; | 1287 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { |
| 1249 | Tegra::MemoryManager& gpu_memory; | 1288 | image.flags |= ImageFlagBits::GpuModified; |
| 1250 | 1289 | image.modification_tick = ++modification_tick; | |
| 1251 | FormatLookupTable format_lookup_table; | 1290 | } |
| 1252 | FormatCompatibility format_compatibility; | ||
| 1253 | |||
| 1254 | u64 ticks{}; | ||
| 1255 | |||
| 1256 | // Guards the cache for protection conflicts. | ||
| 1257 | bool guard_render_targets{}; | ||
| 1258 | bool guard_samplers{}; | ||
| 1259 | |||
| 1260 | // The siblings table is for formats that can inter exchange with one another | ||
| 1261 | // without causing issues. This is only valid when a conflict occurs on a non | ||
| 1262 | // rendering use. | ||
| 1263 | std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; | ||
| 1264 | |||
| 1265 | // The internal Cache is different for the Texture Cache. It's based on buckets | ||
| 1266 | // of 1MB. This fits better for the purpose of this cache as textures are normaly | ||
| 1267 | // large in size. | ||
| 1268 | static constexpr u64 registry_page_bits{20}; | ||
| 1269 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||
| 1270 | std::unordered_map<VAddr, std::vector<TSurface>> registry; | ||
| 1271 | 1291 | ||
| 1272 | static constexpr u32 DEPTH_RT = 8; | 1292 | template <class P> |
| 1273 | static constexpr u32 NO_RT = 0xFFFFFFFF; | 1293 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 1294 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1295 | ImageBase& image = slot_images[image_id]; | ||
| 1296 | u64 most_recent_tick = image.modification_tick; | ||
| 1297 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1298 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1299 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1300 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1301 | aliased_images.push_back(&aliased); | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | if (aliased_images.empty()) { | ||
| 1305 | return; | ||
| 1306 | } | ||
| 1307 | image.modification_tick = most_recent_tick; | ||
| 1308 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1309 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1310 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1311 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1312 | }); | ||
| 1313 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1314 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1315 | } | ||
| 1316 | } | ||
| 1274 | 1317 | ||
| 1275 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 1318 | template <class P> |
| 1276 | // This avoids calculating size and other stuffs. | 1319 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { |
| 1277 | std::unordered_map<VAddr, TSurface> l1_cache; | 1320 | Image& image = slot_images[image_id]; |
| 1321 | if (invalidate) { | ||
| 1322 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1323 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1324 | TrackImage(image); | ||
| 1325 | } | ||
| 1326 | } else { | ||
| 1327 | RefreshContents(image); | ||
| 1328 | SynchronizeAliases(image_id); | ||
| 1329 | } | ||
| 1330 | if (is_modification) { | ||
| 1331 | MarkModification(image); | ||
| 1332 | } | ||
| 1333 | image.frame_tick = frame_tick; | ||
| 1334 | } | ||
| 1278 | 1335 | ||
| 1279 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 1336 | template <class P> |
| 1280 | /// previously been used. This is to prevent surfaces from being constantly created and | 1337 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, |
| 1281 | /// destroyed when used with different surface parameters. | 1338 | bool invalidate) { |
| 1282 | std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; | 1339 | if (!image_view_id) { |
| 1283 | std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | 1340 | return; |
| 1284 | render_targets; | 1341 | } |
| 1285 | FramebufferTargetInfo depth_buffer; | 1342 | const ImageViewBase& image_view = slot_image_views[image_view_id]; |
| 1343 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1344 | } | ||
| 1286 | 1345 | ||
| 1287 | std::vector<TSurface> sampled_textures; | 1346 | template <class P> |
| 1347 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1348 | Image& dst = slot_images[dst_id]; | ||
| 1349 | Image& src = slot_images[src_id]; | ||
| 1350 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1351 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1352 | if (src_format_type == dst_format_type) { | ||
| 1353 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1354 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1355 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1356 | } | ||
| 1357 | } | ||
| 1358 | return runtime.CopyImage(dst, src, copies); | ||
| 1359 | } | ||
| 1360 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1361 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1362 | for (const ImageCopy& copy : copies) { | ||
| 1363 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1364 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1365 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1366 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1367 | |||
| 1368 | const SubresourceBase dst_base{ | ||
| 1369 | .level = copy.dst_subresource.base_level, | ||
| 1370 | .layer = copy.dst_subresource.base_layer, | ||
| 1371 | }; | ||
| 1372 | const SubresourceBase src_base{ | ||
| 1373 | .level = copy.src_subresource.base_level, | ||
| 1374 | .layer = copy.src_subresource.base_layer, | ||
| 1375 | }; | ||
| 1376 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1377 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1378 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1379 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1380 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1381 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1382 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1383 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1384 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1385 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1386 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1387 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1388 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1389 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1390 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1391 | }; | ||
| 1392 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1288 | 1393 | ||
| 1289 | /// This cache stores null surfaces in order to be used as a placeholder | 1394 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); |
| 1290 | /// for invalid texture calls. | 1395 | } |
| 1291 | std::unordered_map<u32, TSurface> invalid_cache; | 1396 | } |
| 1292 | std::vector<u8> invalid_memory; | ||
| 1293 | 1397 | ||
| 1294 | std::list<TSurface> marked_for_unregister; | 1398 | template <class P> |
| 1399 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1400 | if (*old_id == new_id) { | ||
| 1401 | return; | ||
| 1402 | } | ||
| 1403 | if (*old_id) { | ||
| 1404 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1405 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1406 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1407 | } | ||
| 1408 | } | ||
| 1409 | *old_id = new_id; | ||
| 1410 | } | ||
| 1295 | 1411 | ||
| 1296 | std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | 1412 | template <class P> |
| 1297 | std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | 1413 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( |
| 1414 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1415 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1416 | const ImageBase& image = slot_images[image_id]; | ||
| 1417 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1418 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1419 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1420 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1421 | const u32 num_samples = image.info.num_samples; | ||
| 1422 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1423 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1424 | .color_buffer_ids = {color_view_id}, | ||
| 1425 | .depth_buffer_id = depth_view_id, | ||
| 1426 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1427 | }); | ||
| 1428 | return {framebuffer_id, view_id}; | ||
| 1429 | } | ||
| 1298 | 1430 | ||
| 1299 | StagingCache staging_cache; | 1431 | template <class P> |
| 1300 | std::recursive_mutex mutex; | 1432 | bool TextureCache<P>::IsFullClear(ImageViewId id) { |
| 1301 | }; | 1433 | if (!id) { |
| 1434 | return true; | ||
| 1435 | } | ||
| 1436 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1437 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1438 | const Extent3D size = image_view.size; | ||
| 1439 | const auto& regs = maxwell3d.regs; | ||
| 1440 | const auto& scissor = regs.scissor_test[0]; | ||
| 1441 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1442 | // Images with multiple resources can't be cleared in a single call | ||
| 1443 | return false; | ||
| 1444 | } | ||
| 1445 | if (regs.clear_flags.scissor == 0) { | ||
| 1446 | // If scissor testing is disabled, the clear is always full | ||
| 1447 | return true; | ||
| 1448 | } | ||
| 1449 | // Make sure the clear covers all texels in the subresource | ||
| 1450 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1451 | scissor.max_y >= size.height; | ||
| 1452 | } | ||
| 1302 | 1453 | ||
| 1303 | } // namespace VideoCommon | 1454 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/texture_cache/slot_vector.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | constexpr size_t NUM_RT = 8; | ||
| 14 | constexpr size_t MAX_MIP_LEVELS = 14; | ||
| 15 | |||
| 16 | constexpr SlotId CORRUPT_ID{0xfffffffe}; | ||
| 17 | |||
| 18 | using ImageId = SlotId; | ||
| 19 | using ImageViewId = SlotId; | ||
| 20 | using ImageAllocId = SlotId; | ||
| 21 | using SamplerId = SlotId; | ||
| 22 | using FramebufferId = SlotId; | ||
| 23 | |||
| 24 | enum class ImageType : u32 { | ||
| 25 | e1D, | ||
| 26 | e2D, | ||
| 27 | e3D, | ||
| 28 | Linear, | ||
| 29 | Buffer, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class ImageViewType : u32 { | ||
| 33 | e1D, | ||
| 34 | e2D, | ||
| 35 | Cube, | ||
| 36 | e3D, | ||
| 37 | e1DArray, | ||
| 38 | e2DArray, | ||
| 39 | CubeArray, | ||
| 40 | Rect, | ||
| 41 | Buffer, | ||
| 42 | }; | ||
| 43 | constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; | ||
| 44 | |||
| 45 | enum class RelaxedOptions : u32 { | ||
| 46 | Size = 1 << 0, | ||
| 47 | Format = 1 << 1, | ||
| 48 | Samples = 1 << 2, | ||
| 49 | }; | ||
| 50 | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | ||
| 51 | |||
| 52 | struct Offset2D { | ||
| 53 | constexpr auto operator<=>(const Offset2D&) const noexcept = default; | ||
| 54 | |||
| 55 | s32 x; | ||
| 56 | s32 y; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Offset3D { | ||
| 60 | constexpr auto operator<=>(const Offset3D&) const noexcept = default; | ||
| 61 | |||
| 62 | s32 x; | ||
| 63 | s32 y; | ||
| 64 | s32 z; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct Extent2D { | ||
| 68 | constexpr auto operator<=>(const Extent2D&) const noexcept = default; | ||
| 69 | |||
| 70 | u32 width; | ||
| 71 | u32 height; | ||
| 72 | }; | ||
| 73 | |||
| 74 | struct Extent3D { | ||
| 75 | constexpr auto operator<=>(const Extent3D&) const noexcept = default; | ||
| 76 | |||
| 77 | u32 width; | ||
| 78 | u32 height; | ||
| 79 | u32 depth; | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct SubresourceLayers { | ||
| 83 | s32 base_level = 0; | ||
| 84 | s32 base_layer = 0; | ||
| 85 | s32 num_layers = 1; | ||
| 86 | }; | ||
| 87 | |||
| 88 | struct SubresourceBase { | ||
| 89 | constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; | ||
| 90 | |||
| 91 | s32 level = 0; | ||
| 92 | s32 layer = 0; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct SubresourceExtent { | ||
| 96 | constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; | ||
| 97 | |||
| 98 | s32 levels = 1; | ||
| 99 | s32 layers = 1; | ||
| 100 | }; | ||
| 101 | |||
| 102 | struct SubresourceRange { | ||
| 103 | constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; | ||
| 104 | |||
| 105 | SubresourceBase base; | ||
| 106 | SubresourceExtent extent; | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct ImageCopy { | ||
| 110 | SubresourceLayers src_subresource; | ||
| 111 | SubresourceLayers dst_subresource; | ||
| 112 | Offset3D src_offset; | ||
| 113 | Offset3D dst_offset; | ||
| 114 | Extent3D extent; | ||
| 115 | }; | ||
| 116 | |||
| 117 | struct BufferImageCopy { | ||
| 118 | size_t buffer_offset; | ||
| 119 | size_t buffer_size; | ||
| 120 | u32 buffer_row_length; | ||
| 121 | u32 buffer_image_height; | ||
| 122 | SubresourceLayers image_subresource; | ||
| 123 | Offset3D image_offset; | ||
| 124 | Extent3D image_extent; | ||
| 125 | }; | ||
| 126 | |||
| 127 | struct BufferCopy { | ||
| 128 | size_t src_offset; | ||
| 129 | size_t dst_offset; | ||
| 130 | size_t size; | ||
| 131 | }; | ||
| 132 | |||
| 133 | struct SwizzleParameters { | ||
| 134 | Extent3D num_tiles; | ||
| 135 | Extent3D block; | ||
| 136 | size_t buffer_offset; | ||
| 137 | s32 level; | ||
| 138 | }; | ||
| 139 | |||
| 140 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..9ed1fc007 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -0,0 +1,1232 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This files contains code from Ryujinx | ||
| 6 | // A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx | ||
| 7 | // The sections using code from Ryujinx are marked with a link to the original version | ||
| 8 | |||
| 9 | // MIT License | ||
| 10 | // | ||
| 11 | // Copyright (c) Ryujinx Team and Contributors | ||
| 12 | // | ||
| 13 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 14 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 15 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 16 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 17 | // furnished to do so, subject to the following conditions: | ||
| 18 | // | ||
| 19 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 20 | // substantial portions of the Software. | ||
| 21 | // | ||
| 22 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 23 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 24 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 25 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 27 | // | ||
| 28 | |||
| 29 | #include <algorithm> | ||
| 30 | #include <array> | ||
| 31 | #include <numeric> | ||
| 32 | #include <optional> | ||
| 33 | #include <span> | ||
| 34 | #include <vector> | ||
| 35 | |||
| 36 | #include "common/alignment.h" | ||
| 37 | #include "common/assert.h" | ||
| 38 | #include "common/bit_util.h" | ||
| 39 | #include "common/common_types.h" | ||
| 40 | #include "common/div_ceil.h" | ||
| 41 | #include "video_core/compatible_formats.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/surface.h" | ||
| 45 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 46 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 47 | #include "video_core/texture_cache/formatter.h" | ||
| 48 | #include "video_core/texture_cache/samples_helper.h" | ||
| 49 | #include "video_core/texture_cache/util.h" | ||
| 50 | #include "video_core/textures/astc.h" | ||
| 51 | #include "video_core/textures/decoders.h" | ||
| 52 | |||
| 53 | namespace VideoCommon { | ||
| 54 | |||
| 55 | namespace { | ||
| 56 | |||
| 57 | using Tegra::Texture::GOB_SIZE; | ||
| 58 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 59 | using Tegra::Texture::GOB_SIZE_X; | ||
| 60 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 61 | using Tegra::Texture::GOB_SIZE_Y; | ||
| 62 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 63 | using Tegra::Texture::GOB_SIZE_Z; | ||
| 64 | using Tegra::Texture::GOB_SIZE_Z_SHIFT; | ||
| 65 | using Tegra::Texture::MsaaMode; | ||
| 66 | using Tegra::Texture::SwizzleTexture; | ||
| 67 | using Tegra::Texture::TextureFormat; | ||
| 68 | using Tegra::Texture::TextureType; | ||
| 69 | using Tegra::Texture::TICEntry; | ||
| 70 | using Tegra::Texture::UnswizzleTexture; | ||
| 71 | using VideoCore::Surface::BytesPerBlock; | ||
| 72 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 73 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 74 | using VideoCore::Surface::IsCopyCompatible; | ||
| 75 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 76 | using VideoCore::Surface::IsViewCompatible; | ||
| 77 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 78 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 79 | using VideoCore::Surface::SurfaceType; | ||
| 80 | |||
| 81 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 82 | |||
| 83 | struct LevelInfo { | ||
| 84 | Extent3D size; | ||
| 85 | Extent3D block; | ||
| 86 | Extent2D tile_size; | ||
| 87 | u32 bpp_log2; | ||
| 88 | u32 tile_width_spacing; | ||
| 89 | }; | ||
| 90 | |||
| 91 | [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { | ||
| 92 | if (shift == 0) { | ||
| 93 | return 0; | ||
| 94 | } | ||
| 95 | u32 x = unit_factor << (shift - 1); | ||
| 96 | if (x >= dimension) { | ||
| 97 | while (--shift) { | ||
| 98 | x >>= 1; | ||
| 99 | if (x < dimension) { | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | ||
| 104 | return shift; | ||
| 105 | } | ||
| 106 | |||
| 107 | [[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { | ||
| 108 | return std::max<u32>(size >> level, 1); | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { | ||
| 112 | return Extent3D{ | ||
| 113 | .width = AdjustMipSize(size.width, level), | ||
| 114 | .height = AdjustMipSize(size.height, level), | ||
| 115 | .depth = AdjustMipSize(size.depth, level), | ||
| 116 | }; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { | ||
| 120 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 121 | return Extent3D{ | ||
| 122 | .width = size.width >> samples_x, | ||
| 123 | .height = size.height >> samples_y, | ||
| 124 | .depth = size.depth, | ||
| 125 | }; | ||
| 126 | } | ||
| 127 | |||
| 128 | template <u32 GOB_EXTENT> | ||
| 129 | [[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { | ||
| 130 | do { | ||
| 131 | while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { | ||
| 132 | --block_size; | ||
| 133 | } | ||
| 134 | } while (level--); | ||
| 135 | return block_size; | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, | ||
| 139 | u32 level) { | ||
| 140 | return { | ||
| 141 | .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), | ||
| 142 | .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), | ||
| 143 | .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), | ||
| 144 | }; | ||
| 145 | } | ||
| 146 | |||
| 147 | [[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { | ||
| 148 | return { | ||
| 149 | .width = Common::DivCeil(size.width, tile_size.width), | ||
| 150 | .height = Common::DivCeil(size.height, tile_size.height), | ||
| 151 | .depth = size.depth, | ||
| 152 | }; | ||
| 153 | } | ||
| 154 | |||
| 155 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { | ||
| 156 | return std::countl_zero(bytes_per_block) ^ 0x1F; | ||
| 157 | } | ||
| 158 | |||
| 159 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { | ||
| 160 | return BytesPerBlockLog2(BytesPerBlock(format)); | ||
| 161 | } | ||
| 162 | |||
| 163 | [[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { | ||
| 164 | const Extent3D num_blocks = AdjustTileSize(size, tile_size); | ||
| 165 | return num_blocks.width * num_blocks.height * num_blocks.depth; | ||
| 166 | } | ||
| 167 | |||
| 168 | [[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { | ||
| 169 | return Common::DivCeil(AdjustMipSize(size, level), block_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | [[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { | ||
| 173 | return config.Width() * config.Height() * BytesPerBlock(format); | ||
| 174 | } | ||
| 175 | |||
| 176 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { | ||
| 177 | switch (type) { | ||
| 178 | case TextureType::Texture2D: | ||
| 179 | case TextureType::Texture2DArray: | ||
| 180 | case TextureType::Texture2DNoMipmap: | ||
| 181 | case TextureType::Texture3D: | ||
| 182 | case TextureType::TextureCubeArray: | ||
| 183 | case TextureType::TextureCubemap: | ||
| 184 | return true; | ||
| 185 | case TextureType::Texture1D: | ||
| 186 | case TextureType::Texture1DArray: | ||
| 187 | case TextureType::Texture1DBuffer: | ||
| 188 | return false; | ||
| 189 | } | ||
| 190 | return false; | ||
| 191 | } | ||
| 192 | |||
| 193 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { | ||
| 194 | switch (type) { | ||
| 195 | case ImageType::e2D: | ||
| 196 | case ImageType::e3D: | ||
| 197 | case ImageType::Linear: | ||
| 198 | return true; | ||
| 199 | case ImageType::e1D: | ||
| 200 | case ImageType::Buffer: | ||
| 201 | return false; | ||
| 202 | } | ||
| 203 | UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); | ||
| 204 | } | ||
| 205 | |||
| 206 | [[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { | ||
| 207 | switch (num_samples) { | ||
| 208 | case 1: | ||
| 209 | return {1, 1}; | ||
| 210 | case 2: | ||
| 211 | return {2, 1}; | ||
| 212 | case 4: | ||
| 213 | return {2, 2}; | ||
| 214 | case 8: | ||
| 215 | return {4, 2}; | ||
| 216 | case 16: | ||
| 217 | return {4, 4}; | ||
| 218 | } | ||
| 219 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 220 | return {1, 1}; | ||
| 221 | } | ||
| 222 | |||
| 223 | [[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { | ||
| 224 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; | ||
| 225 | } | ||
| 226 | |||
| 227 | [[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { | ||
| 228 | return Extent3D{ | ||
| 229 | .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, | ||
| 230 | .height = AdjustSize(info.size.height, level, info.tile_size.height), | ||
| 231 | .depth = AdjustMipSize(info.size.depth, level), | ||
| 232 | }; | ||
| 233 | } | ||
| 234 | |||
| 235 | [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { | ||
| 236 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 237 | return Extent3D{ | ||
| 238 | .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), | ||
| 239 | .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), | ||
| 240 | .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), | ||
| 241 | }; | ||
| 242 | } | ||
| 243 | |||
| 244 | [[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { | ||
| 245 | return Extent2D{ | ||
| 246 | .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, | ||
| 247 | .height = GOB_SIZE_Y_SHIFT + block_height, | ||
| 248 | }; | ||
| 249 | } | ||
| 250 | |||
| 251 | [[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, | ||
| 252 | u32 block_depth) { | ||
| 253 | return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || | ||
| 254 | num_tiles.depth < (1U << block_depth); | ||
| 255 | } | ||
| 256 | |||
| 257 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, | ||
| 258 | u32 bpp_log2) { | ||
| 259 | if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { | ||
| 260 | return GOB_SIZE_X_SHIFT - bpp_log2; | ||
| 261 | } else { | ||
| 262 | return gob.width; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, | ||
| 267 | u32 tile_width_spacing) { | ||
| 268 | const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); | ||
| 269 | return StrideAlignment(num_tiles, block, gob, bpp_log2); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { | ||
| 273 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 274 | const Extent2D gobs{ | ||
| 275 | .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), | ||
| 276 | .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), | ||
| 277 | }; | ||
| 278 | const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 279 | const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); | ||
| 280 | const u32 alignment = is_small ? 0 : info.tile_width_spacing; | ||
| 281 | return Extent2D{ | ||
| 282 | .width = Common::AlignBits(gobs.width, alignment), | ||
| 283 | .height = gobs.height, | ||
| 284 | }; | ||
| 285 | } | ||
| 286 | |||
| 287 | [[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { | ||
| 288 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 289 | const Extent3D tile_shift = TileShift(info, level); | ||
| 290 | const Extent2D gobs = NumGobs(info, level); | ||
| 291 | return Extent3D{ | ||
| 292 | .width = Common::DivCeilLog2(gobs.width, tile_shift.width), | ||
| 293 | .height = Common::DivCeilLog2(gobs.height, tile_shift.height), | ||
| 294 | .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | [[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { | ||
| 299 | const Extent3D tile_shift = TileShift(info, level); | ||
| 300 | const Extent3D tiles = LevelTiles(info, level); | ||
| 301 | const u32 num_tiles = tiles.width * tiles.height * tiles.depth; | ||
| 302 | const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; | ||
| 303 | return num_tiles << shift; | ||
| 304 | } | ||
| 305 | |||
| 306 | [[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info, | ||
| 307 | u32 num_levels) { | ||
| 308 | ASSERT(num_levels <= MAX_MIP_LEVELS); | ||
| 309 | std::array<u32, MAX_MIP_LEVELS> sizes{}; | ||
| 310 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 311 | sizes[level] = CalculateLevelSize(info, level); | ||
| 312 | } | ||
| 313 | return sizes; | ||
| 314 | } | ||
| 315 | |||
| 316 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, | ||
| 317 | u32 num_samples, u32 tile_width_spacing) { | ||
| 318 | const auto [samples_x, samples_y] = Samples(num_samples); | ||
| 319 | const u32 bytes_per_block = BytesPerBlock(format); | ||
| 320 | return { | ||
| 321 | .size = | ||
| 322 | { | ||
| 323 | .width = size.width * samples_x, | ||
| 324 | .height = size.height * samples_y, | ||
| 325 | .depth = size.depth, | ||
| 326 | }, | ||
| 327 | .block = block, | ||
| 328 | .tile_size = DefaultBlockSize(format), | ||
| 329 | .bpp_log2 = BytesPerBlockLog2(bytes_per_block), | ||
| 330 | .tile_width_spacing = tile_width_spacing, | ||
| 331 | }; | ||
| 332 | } | ||
| 333 | |||
| 334 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { | ||
| 335 | return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, | ||
| 336 | info.tile_width_spacing); | ||
| 337 | } | ||
| 338 | |||
| 339 | [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, | ||
| 340 | u32 num_samples, u32 tile_width_spacing, | ||
| 341 | u32 level) { | ||
| 342 | const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); | ||
| 343 | u32 offset = 0; | ||
| 344 | for (u32 current_level = 0; current_level < level; ++current_level) { | ||
| 345 | offset += CalculateLevelSize(info, current_level); | ||
| 346 | } | ||
| 347 | return offset; | ||
| 348 | } | ||
| 349 | |||
| 350 | [[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, | ||
| 351 | u32 tile_size_y, u32 tile_width_spacing) { | ||
| 352 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 | ||
| 353 | if (tile_width_spacing > 0) { | ||
| 354 | const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; | ||
| 355 | return Common::AlignBits(size_bytes, alignment_log2); | ||
| 356 | } | ||
| 357 | const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); | ||
| 358 | while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { | ||
| 359 | --block.height; | ||
| 360 | } | ||
| 361 | while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { | ||
| 362 | --block.depth; | ||
| 363 | } | ||
| 364 | const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; | ||
| 365 | const u32 num_blocks = size_bytes >> block_shift; | ||
| 366 | if (size_bytes != num_blocks << block_shift) { | ||
| 367 | return (num_blocks + 1) << block_shift; | ||
| 368 | } | ||
| 369 | return size_bytes; | ||
| 370 | } | ||
| 371 | |||
| 372 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info, | ||
| 373 | const ImageBase& overlap, | ||
| 374 | bool strict_size) { | ||
| 375 | const ImageInfo& info = overlap.info; | ||
| 376 | if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { | ||
| 377 | return std::nullopt; | ||
| 378 | } | ||
| 379 | if (new_info.block != info.block) { | ||
| 380 | return std::nullopt; | ||
| 381 | } | ||
| 382 | const SubresourceExtent resources = new_info.resources; | ||
| 383 | return SubresourceExtent{ | ||
| 384 | .levels = std::max(resources.levels, info.resources.levels), | ||
| 385 | .layers = std::max(resources.layers, info.resources.layers), | ||
| 386 | }; | ||
| 387 | } | ||
| 388 | |||
| 389 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | ||
| 390 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 391 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | ||
| 392 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | ||
| 393 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 394 | if (it == slice_offsets.end()) { | ||
| 395 | return std::nullopt; | ||
| 396 | } | ||
| 397 | const std::vector subresources = CalculateSliceSubresources(new_info); | ||
| 398 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 399 | const ImageInfo& info = overlap.info; | ||
| 400 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 401 | return std::nullopt; | ||
| 402 | } | ||
| 403 | const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); | ||
| 404 | if (mip_depth < info.size.depth + base.layer) { | ||
| 405 | return std::nullopt; | ||
| 406 | } | ||
| 407 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 408 | return std::nullopt; | ||
| 409 | } | ||
| 410 | return SubresourceExtent{ | ||
| 411 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 412 | .layers = 1, | ||
| 413 | }; | ||
| 414 | } | ||
| 415 | |||
| 416 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( | ||
| 417 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 418 | const u32 layer_stride = new_info.layer_stride; | ||
| 419 | const s32 new_size = layer_stride * new_info.resources.layers; | ||
| 420 | const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); | ||
| 421 | if (diff > new_size) { | ||
| 422 | return std::nullopt; | ||
| 423 | } | ||
| 424 | const s32 base_layer = diff / layer_stride; | ||
| 425 | const s32 mip_offset = diff % layer_stride; | ||
| 426 | const std::array offsets = CalculateMipLevelOffsets(new_info); | ||
| 427 | const auto end = offsets.begin() + new_info.resources.levels; | ||
| 428 | const auto it = std::find(offsets.begin(), end, mip_offset); | ||
| 429 | if (it == end) { | ||
| 430 | // Mipmap is not aligned to any valid size | ||
| 431 | return std::nullopt; | ||
| 432 | } | ||
| 433 | const SubresourceBase base{ | ||
| 434 | .level = static_cast<s32>(std::distance(offsets.begin(), it)), | ||
| 435 | .layer = base_layer, | ||
| 436 | }; | ||
| 437 | const ImageInfo& info = overlap.info; | ||
| 438 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 439 | return std::nullopt; | ||
| 440 | } | ||
| 441 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 442 | return std::nullopt; | ||
| 443 | } | ||
| 444 | return SubresourceExtent{ | ||
| 445 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 446 | .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), | ||
| 447 | }; | ||
| 448 | } | ||
| 449 | |||
| 450 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info, | ||
| 451 | GPUVAddr gpu_addr, | ||
| 452 | VAddr cpu_addr, | ||
| 453 | const ImageBase& overlap, | ||
| 454 | bool strict_size) { | ||
| 455 | std::optional<SubresourceExtent> resources; | ||
| 456 | if (new_info.type != ImageType::e3D) { | ||
| 457 | resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); | ||
| 458 | } else { | ||
| 459 | resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); | ||
| 460 | } | ||
| 461 | if (!resources) { | ||
| 462 | return std::nullopt; | ||
| 463 | } | ||
| 464 | return OverlapResult{ | ||
| 465 | .gpu_addr = gpu_addr, | ||
| 466 | .cpu_addr = cpu_addr, | ||
| 467 | .resources = *resources, | ||
| 468 | }; | ||
| 469 | } | ||
| 470 | |||
| 471 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info, | ||
| 472 | GPUVAddr gpu_addr, | ||
| 473 | VAddr cpu_addr, | ||
| 474 | const ImageBase& overlap, | ||
| 475 | bool strict_size) { | ||
| 476 | const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr); | ||
| 477 | if (!base) { | ||
| 478 | return std::nullopt; | ||
| 479 | } | ||
| 480 | const ImageInfo& info = overlap.info; | ||
| 481 | if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { | ||
| 482 | return std::nullopt; | ||
| 483 | } | ||
| 484 | if (new_info.block != MipBlockSize(info, base->level)) { | ||
| 485 | return std::nullopt; | ||
| 486 | } | ||
| 487 | const SubresourceExtent resources = new_info.resources; | ||
| 488 | s32 layers = 1; | ||
| 489 | if (info.type != ImageType::e3D) { | ||
| 490 | layers = std::max(resources.layers, info.resources.layers + base->layer); | ||
| 491 | } | ||
| 492 | return OverlapResult{ | ||
| 493 | .gpu_addr = overlap.gpu_addr, | ||
| 494 | .cpu_addr = overlap.cpu_addr, | ||
| 495 | .resources = | ||
| 496 | { | ||
| 497 | .levels = std::max(resources.levels + base->level, info.resources.levels), | ||
| 498 | .layers = layers, | ||
| 499 | }, | ||
| 500 | }; | ||
| 501 | } | ||
| 502 | |||
| 503 | [[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { | ||
| 504 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 | ||
| 505 | static constexpr u32 STRIDE_ALIGNMENT = 32; | ||
| 506 | ASSERT(info.type == ImageType::Linear); | ||
| 507 | const Extent2D num_tiles{ | ||
| 508 | .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), | ||
| 509 | .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), | ||
| 510 | }; | ||
| 511 | const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); | ||
| 512 | return Extent2D{ | ||
| 513 | .width = Common::AlignUp(num_tiles.width, width_alignment), | ||
| 514 | .height = num_tiles.height, | ||
| 515 | }; | ||
| 516 | } | ||
| 517 | |||
| 518 | [[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { | ||
| 519 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 | ||
| 520 | ASSERT(info.type != ImageType::Linear); | ||
| 521 | const Extent3D size = AdjustMipSize(info.size, level); | ||
| 522 | const Extent3D num_tiles{ | ||
| 523 | .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), | ||
| 524 | .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), | ||
| 525 | .depth = size.depth, | ||
| 526 | }; | ||
| 527 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 528 | const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); | ||
| 529 | const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); | ||
| 530 | return Extent3D{ | ||
| 531 | .width = Common::AlignBits(num_tiles.width, alignment), | ||
| 532 | .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), | ||
| 533 | .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), | ||
| 534 | }; | ||
| 535 | } | ||
| 536 | |||
| 537 | [[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { | ||
| 538 | u32 num_blocks = 0; | ||
| 539 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 540 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 541 | num_blocks += NumBlocks(mip_size, tile_size); | ||
| 542 | } | ||
| 543 | return num_blocks; | ||
| 544 | } | ||
| 545 | |||
| 546 | [[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { | ||
| 547 | ASSERT(info.type == ImageType::e3D); | ||
| 548 | u32 num_slices = 0; | ||
| 549 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 550 | num_slices += AdjustMipSize(info.size.depth, level); | ||
| 551 | } | ||
| 552 | return num_slices; | ||
| 553 | } | ||
| 554 | |||
| 555 | void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 556 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 557 | std::span<const u8> memory) { | ||
| 558 | ASSERT(copy.image_offset.z == 0); | ||
| 559 | ASSERT(copy.image_extent.depth == 1); | ||
| 560 | ASSERT(copy.image_subresource.base_level == 0); | ||
| 561 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 562 | ASSERT(copy.image_subresource.num_layers == 1); | ||
| 563 | |||
| 564 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 565 | const u32 row_length = copy.image_extent.width * bytes_per_block; | ||
| 566 | const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; | ||
| 567 | |||
| 568 | for (u32 line = 0; line < copy.image_extent.height; ++line) { | ||
| 569 | const u32 host_offset_y = line * info.pitch; | ||
| 570 | const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; | ||
| 571 | const u32 guest_offset = guest_offset_x + guest_offset_y; | ||
| 572 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, | ||
| 573 | row_length); | ||
| 574 | } | ||
| 575 | } | ||
| 576 | |||
| 577 | void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 578 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 579 | std::span<const u8> input) { | ||
| 580 | const Extent3D size = info.size; | ||
| 581 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 582 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 583 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 584 | |||
| 585 | const s32 level = copy.image_subresource.base_level; | ||
| 586 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 587 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 588 | const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; | ||
| 589 | |||
| 590 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 591 | |||
| 592 | UNIMPLEMENTED_IF(copy.image_offset.x != 0); | ||
| 593 | UNIMPLEMENTED_IF(copy.image_offset.y != 0); | ||
| 594 | UNIMPLEMENTED_IF(copy.image_offset.z != 0); | ||
| 595 | UNIMPLEMENTED_IF(copy.image_extent != level_size); | ||
| 596 | |||
| 597 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 598 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 599 | |||
| 600 | size_t host_offset = copy.buffer_offset; | ||
| 601 | |||
| 602 | const u32 num_levels = info.resources.levels; | ||
| 603 | const std::array sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 604 | size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); | ||
| 605 | const size_t layer_stride = | ||
| 606 | AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, | ||
| 607 | level_info.block, tile_size.height, info.tile_width_spacing); | ||
| 608 | const size_t subresource_size = sizes[level]; | ||
| 609 | |||
| 610 | const auto dst_data = std::make_unique<u8[]>(subresource_size); | ||
| 611 | const std::span<u8> dst(dst_data.get(), subresource_size); | ||
| 612 | |||
| 613 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 614 | const std::span<const u8> src = input.subspan(host_offset); | ||
| 615 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||
| 616 | num_tiles.depth, block.height, block.depth); | ||
| 617 | |||
| 618 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 619 | |||
| 620 | host_offset += host_bytes_per_layer; | ||
| 621 | guest_offset += layer_stride; | ||
| 622 | } | ||
| 623 | ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); | ||
| 624 | } | ||
| 625 | |||
| 626 | } // Anonymous namespace | ||
| 627 | |||
| 628 | u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { | ||
| 629 | if (info.type == ImageType::Buffer) { | ||
| 630 | return info.size.width * BytesPerBlock(info.format); | ||
| 631 | } | ||
| 632 | if (info.type == ImageType::Linear) { | ||
| 633 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 634 | } | ||
| 635 | if (info.resources.layers > 1) { | ||
| 636 | ASSERT(info.layer_stride != 0); | ||
| 637 | return info.layer_stride * info.resources.layers; | ||
| 638 | } else { | ||
| 639 | return CalculateLayerSize(info); | ||
| 640 | } | ||
| 641 | } | ||
| 642 | |||
| 643 | u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { | ||
| 644 | if (info.type == ImageType::Buffer) { | ||
| 645 | return info.size.width * BytesPerBlock(info.format); | ||
| 646 | } | ||
| 647 | if (info.num_samples > 1) { | ||
| 648 | // Multisample images can't be uploaded or downloaded to the host | ||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | if (info.type == ImageType::Linear) { | ||
| 652 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 653 | } | ||
| 654 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 655 | return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); | ||
| 656 | } | ||
| 657 | |||
| 658 | u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | ||
| 659 | if (info.type == ImageType::Buffer) { | ||
| 660 | return info.size.width * BytesPerBlock(info.format); | ||
| 661 | } | ||
| 662 | static constexpr Extent2D TILE_SIZE{1, 1}; | ||
| 663 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 664 | } | ||
| 665 | |||
| 666 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | ||
| 667 | ASSERT(info.type != ImageType::Linear); | ||
| 668 | const u32 layer_size = CalculateLayerSize(info); | ||
| 669 | const Extent3D size = info.size; | ||
| 670 | const Extent3D block = info.block; | ||
| 671 | const u32 tile_size_y = DefaultBlockHeight(info.format); | ||
| 672 | return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); | ||
| 673 | } | ||
| 674 | |||
| 675 | u32 CalculateLayerSize(const ImageInfo& info) noexcept { | ||
| 676 | ASSERT(info.type != ImageType::Linear); | ||
| 677 | return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, | ||
| 678 | info.tile_width_spacing, info.resources.levels); | ||
| 679 | } | ||
| 680 | |||
| 681 | std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { | ||
| 682 | ASSERT(info.resources.levels <= MAX_MIP_LEVELS); | ||
| 683 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 684 | std::array<u32, MAX_MIP_LEVELS> offsets{}; | ||
| 685 | u32 offset = 0; | ||
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 687 | offsets[level] = offset; | ||
| 688 | offset += CalculateLevelSize(level_info, level); | ||
| 689 | } | ||
| 690 | return offsets; | ||
| 691 | } | ||
| 692 | |||
| 693 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | ||
| 694 | ASSERT(info.type == ImageType::e3D); | ||
| 695 | std::vector<u32> offsets; | ||
| 696 | offsets.reserve(NumSlices(info)); | ||
| 697 | |||
| 698 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 699 | u32 mip_offset = 0; | ||
| 700 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 701 | const Extent3D tile_shift = TileShift(level_info, level); | ||
| 702 | const Extent3D tiles = LevelTiles(level_info, level); | ||
| 703 | const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; | ||
| 704 | const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; | ||
| 705 | const u32 z_mask = (1U << tile_shift.depth) - 1; | ||
| 706 | const u32 depth = AdjustMipSize(info.size.depth, level); | ||
| 707 | for (u32 slice = 0; slice < depth; ++slice) { | ||
| 708 | const u32 z_low = slice & z_mask; | ||
| 709 | const u32 z_high = slice & ~z_mask; | ||
| 710 | offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); | ||
| 711 | } | ||
| 712 | mip_offset += CalculateLevelSize(level_info, level); | ||
| 713 | } | ||
| 714 | return offsets; | ||
| 715 | } | ||
| 716 | |||
| 717 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | ||
| 718 | ASSERT(info.type == ImageType::e3D); | ||
| 719 | std::vector<SubresourceBase> subresources; | ||
| 720 | subresources.reserve(NumSlices(info)); | ||
| 721 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 722 | const s32 depth = AdjustMipSize(info.size.depth, level); | ||
| 723 | for (s32 slice = 0; slice < depth; ++slice) { | ||
| 724 | subresources.emplace_back(SubresourceBase{ | ||
| 725 | .level = level, | ||
| 726 | .layer = slice, | ||
| 727 | }); | ||
| 728 | } | ||
| 729 | } | ||
| 730 | return subresources; | ||
| 731 | } | ||
| 732 | |||
| 733 | u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { | ||
| 734 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 735 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 736 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 737 | const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); | ||
| 738 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 739 | return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); | ||
| 740 | } | ||
| 741 | |||
| 742 | PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { | ||
| 743 | return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 744 | config.a_type, config.srgb_conversion); | ||
| 745 | } | ||
| 746 | |||
| 747 | ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | ||
| 748 | switch (info.type) { | ||
| 749 | case ImageType::e2D: | ||
| 750 | return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 751 | case ImageType::e3D: | ||
| 752 | return ImageViewType::e2DArray; | ||
| 753 | case ImageType::Linear: | ||
| 754 | return ImageViewType::e2D; | ||
| 755 | default: | ||
| 756 | UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type)); | ||
| 757 | return ImageViewType{}; | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | ||
| 762 | SubresourceBase base) { | ||
| 763 | ASSERT(dst.resources.levels >= src.resources.levels); | ||
| 764 | ASSERT(dst.num_samples == src.num_samples); | ||
| 765 | |||
| 766 | const bool is_dst_3d = dst.type == ImageType::e3D; | ||
| 767 | if (is_dst_3d) { | ||
| 768 | ASSERT(src.type == ImageType::e3D); | ||
| 769 | ASSERT(src.resources.levels == 1); | ||
| 770 | } | ||
| 771 | |||
| 772 | std::vector<ImageCopy> copies; | ||
| 773 | copies.reserve(src.resources.levels); | ||
| 774 | for (s32 level = 0; level < src.resources.levels; ++level) { | ||
| 775 | ImageCopy& copy = copies.emplace_back(); | ||
| 776 | copy.src_subresource = SubresourceLayers{ | ||
| 777 | .base_level = level, | ||
| 778 | .base_layer = 0, | ||
| 779 | .num_layers = src.resources.layers, | ||
| 780 | }; | ||
| 781 | copy.dst_subresource = SubresourceLayers{ | ||
| 782 | .base_level = base.level + level, | ||
| 783 | .base_layer = is_dst_3d ? 0 : base.layer, | ||
| 784 | .num_layers = is_dst_3d ? 1 : src.resources.layers, | ||
| 785 | }; | ||
| 786 | copy.src_offset = Offset3D{ | ||
| 787 | .x = 0, | ||
| 788 | .y = 0, | ||
| 789 | .z = 0, | ||
| 790 | }; | ||
| 791 | copy.dst_offset = Offset3D{ | ||
| 792 | .x = 0, | ||
| 793 | .y = 0, | ||
| 794 | .z = is_dst_3d ? base.layer : 0, | ||
| 795 | }; | ||
| 796 | const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); | ||
| 797 | copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); | ||
| 798 | if (is_dst_3d) { | ||
| 799 | copy.extent.depth = src.size.depth; | ||
| 800 | } | ||
| 801 | } | ||
| 802 | return copies; | ||
| 803 | } | ||
| 804 | |||
| 805 | bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||
| 806 | if (config.Address() == 0) { | ||
| 807 | return false; | ||
| 808 | } | ||
| 809 | if (config.Address() > (u64(1) << 48)) { | ||
| 810 | return false; | ||
| 811 | } | ||
| 812 | return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); | ||
| 813 | } | ||
| 814 | |||
| 815 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 816 | const ImageInfo& info, std::span<u8> output) { | ||
| 817 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); | ||
| 818 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 819 | const Extent3D size = info.size; | ||
| 820 | |||
| 821 | if (info.type == ImageType::Linear) { | ||
| 822 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | ||
| 823 | |||
| 824 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | ||
| 825 | return {{ | ||
| 826 | .buffer_offset = 0, | ||
| 827 | .buffer_size = guest_size_bytes, | ||
| 828 | .buffer_row_length = info.pitch >> bpp_log2, | ||
| 829 | .buffer_image_height = size.height, | ||
| 830 | .image_subresource = | ||
| 831 | { | ||
| 832 | .base_level = 0, | ||
| 833 | .base_layer = 0, | ||
| 834 | .num_layers = 1, | ||
| 835 | }, | ||
| 836 | .image_offset = {0, 0, 0}, | ||
| 837 | .image_extent = size, | ||
| 838 | }}; | ||
| 839 | } | ||
| 840 | const auto input_data = std::make_unique<u8[]>(guest_size_bytes); | ||
| 841 | gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); | ||
| 842 | const std::span<const u8> input(input_data.get(), guest_size_bytes); | ||
| 843 | |||
| 844 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 845 | const s32 num_layers = info.resources.layers; | ||
| 846 | const s32 num_levels = info.resources.levels; | ||
| 847 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 848 | const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 849 | const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 850 | const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); | ||
| 851 | const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, | ||
| 852 | info.tile_width_spacing); | ||
| 853 | size_t guest_offset = 0; | ||
| 854 | u32 host_offset = 0; | ||
| 855 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 856 | |||
| 857 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 858 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 859 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 860 | const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; | ||
| 861 | copies[level] = BufferImageCopy{ | ||
| 862 | .buffer_offset = host_offset, | ||
| 863 | .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers, | ||
| 864 | .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), | ||
| 865 | .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), | ||
| 866 | .image_subresource = | ||
| 867 | { | ||
| 868 | .base_level = level, | ||
| 869 | .base_layer = 0, | ||
| 870 | .num_layers = info.resources.layers, | ||
| 871 | }, | ||
| 872 | .image_offset = {0, 0, 0}, | ||
| 873 | .image_extent = level_size, | ||
| 874 | }; | ||
| 875 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 876 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 877 | const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); | ||
| 878 | size_t guest_layer_offset = 0; | ||
| 879 | |||
| 880 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 881 | const std::span<u8> dst = output.subspan(host_offset); | ||
| 882 | const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset); | ||
| 883 | UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, | ||
| 884 | num_tiles.depth, block.height, block.depth, stride_alignment); | ||
| 885 | guest_layer_offset += layer_stride; | ||
| 886 | host_offset += host_bytes_per_layer; | ||
| 887 | } | ||
| 888 | guest_offset += level_sizes[level]; | ||
| 889 | } | ||
| 890 | return copies; | ||
| 891 | } | ||
| 892 | |||
| 893 | BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 894 | const ImageBase& image, std::span<u8> output) { | ||
| 895 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); | ||
| 896 | return BufferCopy{ | ||
| 897 | .src_offset = 0, | ||
| 898 | .dst_offset = 0, | ||
| 899 | .size = image.guest_size_bytes, | ||
| 900 | }; | ||
| 901 | } | ||
| 902 | |||
| 903 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 904 | std::span<BufferImageCopy> copies) { | ||
| 905 | u32 output_offset = 0; | ||
| 906 | |||
| 907 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 908 | for (BufferImageCopy& copy : copies) { | ||
| 909 | const u32 level = copy.image_subresource.base_level; | ||
| 910 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 911 | ASSERT(copy.image_offset == Offset3D{}); | ||
| 912 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 913 | ASSERT(copy.image_extent == mip_size); | ||
| 914 | ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); | ||
| 915 | ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); | ||
| 916 | |||
| 917 | if (IsPixelFormatASTC(info.format)) { | ||
| 918 | ASSERT(copy.image_extent.depth == 1); | ||
| 919 | Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), | ||
| 920 | copy.image_extent.width, copy.image_extent.height, | ||
| 921 | copy.image_subresource.num_layers, tile_size.width, | ||
| 922 | tile_size.height, output.subspan(output_offset)); | ||
| 923 | } else { | ||
| 924 | DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | ||
| 925 | output.subspan(output_offset)); | ||
| 926 | } | ||
| 927 | copy.buffer_offset = output_offset; | ||
| 928 | copy.buffer_row_length = mip_size.width; | ||
| 929 | copy.buffer_image_height = mip_size.height; | ||
| 930 | |||
| 931 | output_offset += copy.image_extent.width * copy.image_extent.height * | ||
| 932 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | ||
| 937 | const Extent3D size = info.size; | ||
| 938 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 939 | if (info.type == ImageType::Linear) { | ||
| 940 | ASSERT(info.pitch % bytes_per_block == 0); | ||
| 941 | return {{ | ||
| 942 | .buffer_offset = 0, | ||
| 943 | .buffer_size = static_cast<size_t>(info.pitch) * size.height, | ||
| 944 | .buffer_row_length = info.pitch / bytes_per_block, | ||
| 945 | .buffer_image_height = size.height, | ||
| 946 | .image_subresource = | ||
| 947 | { | ||
| 948 | .base_level = 0, | ||
| 949 | .base_layer = 0, | ||
| 950 | .num_layers = 1, | ||
| 951 | }, | ||
| 952 | .image_offset = {0, 0, 0}, | ||
| 953 | .image_extent = size, | ||
| 954 | }}; | ||
| 955 | } | ||
| 956 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 957 | |||
| 958 | const s32 num_layers = info.resources.layers; | ||
| 959 | const s32 num_levels = info.resources.levels; | ||
| 960 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 961 | |||
| 962 | u32 host_offset = 0; | ||
| 963 | |||
| 964 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 965 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 966 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 967 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 968 | const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; | ||
| 969 | copies[level] = BufferImageCopy{ | ||
| 970 | .buffer_offset = host_offset, | ||
| 971 | .buffer_size = host_bytes_per_level, | ||
| 972 | .buffer_row_length = level_size.width, | ||
| 973 | .buffer_image_height = level_size.height, | ||
| 974 | .image_subresource = | ||
| 975 | { | ||
| 976 | .base_level = level, | ||
| 977 | .base_layer = 0, | ||
| 978 | .num_layers = info.resources.layers, | ||
| 979 | }, | ||
| 980 | .image_offset = {0, 0, 0}, | ||
| 981 | .image_extent = level_size, | ||
| 982 | }; | ||
| 983 | host_offset += host_bytes_per_level; | ||
| 984 | } | ||
| 985 | return copies; | ||
| 986 | } | ||
| 987 | |||
| 988 | Extent3D MipSize(Extent3D size, u32 level) { | ||
| 989 | return AdjustMipSize(size, level); | ||
| 990 | } | ||
| 991 | |||
| 992 | Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | ||
| 993 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 994 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 995 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 996 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 997 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 998 | } | ||
| 999 | |||
| 1000 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | ||
| 1001 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 1002 | if (info.type == ImageType::Linear) { | ||
| 1003 | return std::vector{SwizzleParameters{ | ||
| 1004 | .num_tiles = AdjustTileSize(info.size, tile_size), | ||
| 1005 | .block = {}, | ||
| 1006 | .buffer_offset = 0, | ||
| 1007 | .level = 0, | ||
| 1008 | }}; | ||
| 1009 | } | ||
| 1010 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 1011 | const Extent3D size = info.size; | ||
| 1012 | const s32 num_levels = info.resources.levels; | ||
| 1013 | |||
| 1014 | u32 guest_offset = 0; | ||
| 1015 | std::vector<SwizzleParameters> params(num_levels); | ||
| 1016 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 1017 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 1018 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 1019 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 1020 | params[level] = SwizzleParameters{ | ||
| 1021 | .num_tiles = num_tiles, | ||
| 1022 | .block = block, | ||
| 1023 | .buffer_offset = guest_offset, | ||
| 1024 | .level = level, | ||
| 1025 | }; | ||
| 1026 | guest_offset += CalculateLevelSize(level_info, level); | ||
| 1027 | } | ||
| 1028 | return params; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 1032 | std::span<const BufferImageCopy> copies, std::span<const u8> memory) { | ||
| 1033 | const bool is_pitch_linear = info.type == ImageType::Linear; | ||
| 1034 | for (const BufferImageCopy& copy : copies) { | ||
| 1035 | if (is_pitch_linear) { | ||
| 1036 | SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1037 | } else { | ||
| 1038 | SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, | ||
| 1044 | u32 rhs_level, bool strict_size) noexcept { | ||
| 1045 | ASSERT(lhs.type != ImageType::Linear); | ||
| 1046 | ASSERT(rhs.type != ImageType::Linear); | ||
| 1047 | if (strict_size) { | ||
| 1048 | const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); | ||
| 1049 | const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); | ||
| 1050 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1051 | } else { | ||
| 1052 | const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); | ||
| 1053 | const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); | ||
| 1054 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { | ||
| 1059 | ASSERT(lhs.type == ImageType::Linear); | ||
| 1060 | ASSERT(rhs.type == ImageType::Linear); | ||
| 1061 | if (strict_size) { | ||
| 1062 | return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; | ||
| 1063 | } else { | ||
| 1064 | const Extent2D lhs_size = PitchLinearAlignedSize(lhs); | ||
| 1065 | const Extent2D rhs_size = PitchLinearAlignedSize(rhs); | ||
| 1066 | return lhs_size == rhs_size; | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, | ||
| 1071 | VAddr cpu_addr, const ImageBase& overlap, | ||
| 1072 | bool strict_size) { | ||
| 1073 | ASSERT(new_info.type != ImageType::Linear); | ||
| 1074 | ASSERT(overlap.info.type != ImageType::Linear); | ||
| 1075 | if (!IsLayerStrideCompatible(new_info, overlap.info)) { | ||
| 1076 | return std::nullopt; | ||
| 1077 | } | ||
| 1078 | if (!IsViewCompatible(overlap.info.format, new_info.format)) { | ||
| 1079 | return std::nullopt; | ||
| 1080 | } | ||
| 1081 | if (gpu_addr == overlap.gpu_addr) { | ||
| 1082 | const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); | ||
| 1083 | if (!solution) { | ||
| 1084 | return std::nullopt; | ||
| 1085 | } | ||
| 1086 | return OverlapResult{ | ||
| 1087 | .gpu_addr = gpu_addr, | ||
| 1088 | .cpu_addr = cpu_addr, | ||
| 1089 | .resources = *solution, | ||
| 1090 | }; | ||
| 1091 | } | ||
| 1092 | if (overlap.gpu_addr > gpu_addr) { | ||
| 1093 | return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1094 | } | ||
| 1095 | // if overlap.gpu_addr < gpu_addr | ||
| 1096 | return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { | ||
| 1100 | // If either of the layer strides is zero, we can assume they are compatible | ||
| 1101 | // These images generally come from rendertargets | ||
| 1102 | if (lhs.layer_stride == 0) { | ||
| 1103 | return true; | ||
| 1104 | } | ||
| 1105 | if (rhs.layer_stride == 0) { | ||
| 1106 | return true; | ||
| 1107 | } | ||
| 1108 | // It's definitely compatible if the layer stride matches | ||
| 1109 | if (lhs.layer_stride == rhs.layer_stride) { | ||
| 1110 | return true; | ||
| 1111 | } | ||
| 1112 | // Although we also have to compare for cases where it can be unaligned | ||
| 1113 | // This can happen if the image doesn't have layers, so the stride is not aligned | ||
| 1114 | if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { | ||
| 1115 | return true; | ||
| 1116 | } | ||
| 1117 | return false; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 1121 | GPUVAddr candidate_addr, RelaxedOptions options) { | ||
| 1122 | const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); | ||
| 1123 | if (!base) { | ||
| 1124 | return std::nullopt; | ||
| 1125 | } | ||
| 1126 | const ImageInfo& existing = image.info; | ||
| 1127 | if (False(options & RelaxedOptions::Format)) { | ||
| 1128 | if (!IsViewCompatible(existing.format, candidate.format)) { | ||
| 1129 | return std::nullopt; | ||
| 1130 | } | ||
| 1131 | } | ||
| 1132 | if (!IsLayerStrideCompatible(existing, candidate)) { | ||
| 1133 | return std::nullopt; | ||
| 1134 | } | ||
| 1135 | if (existing.type != candidate.type) { | ||
| 1136 | return std::nullopt; | ||
| 1137 | } | ||
| 1138 | if (False(options & RelaxedOptions::Samples)) { | ||
| 1139 | if (existing.num_samples != candidate.num_samples) { | ||
| 1140 | return std::nullopt; | ||
| 1141 | } | ||
| 1142 | } | ||
| 1143 | if (existing.resources.levels < candidate.resources.levels + base->level) { | ||
| 1144 | return std::nullopt; | ||
| 1145 | } | ||
| 1146 | if (existing.type == ImageType::e3D) { | ||
| 1147 | const u32 mip_depth = std::max(1U, existing.size.depth << base->level); | ||
| 1148 | if (mip_depth < candidate.size.depth + base->layer) { | ||
| 1149 | return std::nullopt; | ||
| 1150 | } | ||
| 1151 | } else { | ||
| 1152 | if (existing.resources.layers < candidate.resources.layers + base->layer) { | ||
| 1153 | return std::nullopt; | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | const bool strict_size = False(options & RelaxedOptions::Size); | ||
| 1157 | if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { | ||
| 1158 | return std::nullopt; | ||
| 1159 | } | ||
| 1160 | // TODO: compare block sizes | ||
| 1161 | return base; | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, | ||
| 1165 | RelaxedOptions options) { | ||
| 1166 | return FindSubresource(candidate, image, candidate_addr, options).has_value(); | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 1170 | const ImageBase* src) { | ||
| 1171 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1172 | src_info.format = src->info.format; | ||
| 1173 | } | ||
| 1174 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1175 | dst_info.format = dst->info.format; | ||
| 1176 | } | ||
| 1177 | if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1178 | dst_info.format = src->info.format; | ||
| 1179 | } | ||
| 1180 | if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1181 | src_info.format = src->info.format; | ||
| 1182 | } | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | u32 MapSizeBytes(const ImageBase& image) { | ||
| 1186 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 1187 | return image.guest_size_bytes; | ||
| 1188 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 1189 | return image.converted_size_bytes; | ||
| 1190 | } else { | ||
| 1191 | return image.unswizzled_size_bytes; | ||
| 1192 | } | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | using P = PixelFormat; | ||
| 1196 | |||
| 1197 | static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); | ||
| 1198 | static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); | ||
| 1199 | |||
| 1200 | static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); | ||
| 1201 | static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == | ||
| 1202 | 0x50d200); | ||
| 1203 | |||
| 1204 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); | ||
| 1205 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); | ||
| 1206 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); | ||
| 1207 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); | ||
| 1208 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); | ||
| 1209 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); | ||
| 1210 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); | ||
| 1211 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); | ||
| 1212 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); | ||
| 1213 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); | ||
| 1214 | |||
| 1215 | constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, | ||
| 1216 | u32 tile_width_spacing, u32 level) { | ||
| 1217 | const Extent3D size{width, height, 1}; | ||
| 1218 | const Extent3D block{0, block_height, 0}; | ||
| 1219 | const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); | ||
| 1220 | return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); | ||
| 1224 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); | ||
| 1225 | static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); | ||
| 1226 | |||
| 1227 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, | ||
| 1228 | "Tile width spacing is not working"); | ||
| 1229 | static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, | ||
| 1230 | "Compressed tile width spacing is not working"); | ||
| 1231 | |||
| 1232 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..dbbbd33cd --- /dev/null +++ b/src/video_core/texture_cache/util.h | |||
| @@ -0,0 +1,107 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <span> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/image_base.h" | ||
| 15 | #include "video_core/texture_cache/image_view_base.h" | ||
| 16 | #include "video_core/texture_cache/types.h" | ||
| 17 | #include "video_core/textures/texture.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | using Tegra::Texture::TICEntry; | ||
| 22 | |||
| 23 | struct OverlapResult { | ||
| 24 | GPUVAddr gpu_addr; | ||
| 25 | VAddr cpu_addr; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | }; | ||
| 28 | |||
| 29 | [[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; | ||
| 32 | |||
| 33 | [[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; | ||
| 34 | |||
| 35 | [[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; | ||
| 36 | |||
| 37 | [[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; | ||
| 38 | |||
| 39 | [[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets( | ||
| 40 | const ImageInfo& info) noexcept; | ||
| 41 | |||
| 42 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | ||
| 43 | |||
| 44 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | ||
| 45 | |||
| 46 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | ||
| 47 | |||
| 48 | [[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( | ||
| 49 | const Tegra::Texture::TICEntry& config) noexcept; | ||
| 50 | |||
| 51 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | ||
| 52 | |||
| 53 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | ||
| 54 | const ImageInfo& src, | ||
| 55 | SubresourceBase base); | ||
| 56 | |||
| 57 | [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||
| 58 | |||
| 59 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||
| 60 | GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 61 | std::span<u8> output); | ||
| 62 | |||
| 63 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 64 | const ImageBase& image, std::span<u8> output); | ||
| 65 | |||
| 66 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 67 | std::span<BufferImageCopy> copies); | ||
| 68 | |||
| 69 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | ||
| 70 | |||
| 71 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | ||
| 72 | |||
| 73 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | ||
| 74 | |||
| 75 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | ||
| 76 | |||
| 77 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 78 | std::span<const BufferImageCopy> copies, std::span<const u8> memory); | ||
| 79 | |||
| 80 | [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, | ||
| 81 | const ImageInfo& overlap_info, u32 new_level, | ||
| 82 | u32 overlap_level, bool strict_size) noexcept; | ||
| 83 | |||
| 84 | [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, | ||
| 85 | bool strict_size) noexcept; | ||
| 86 | |||
| 87 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | ||
| 88 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 89 | const ImageBase& overlap, | ||
| 90 | bool strict_size); | ||
| 91 | |||
| 92 | [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); | ||
| 93 | |||
| 94 | [[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, | ||
| 95 | const ImageBase& image, | ||
| 96 | GPUVAddr candidate_addr, | ||
| 97 | RelaxedOptions options); | ||
| 98 | |||
| 99 | [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 100 | GPUVAddr candidate_addr, RelaxedOptions options); | ||
| 101 | |||
| 102 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 103 | const ImageBase* src); | ||
| 104 | |||
| 105 | [[nodiscard]] u32 MapSizeBytes(const ImageBase& image); | ||
| 106 | |||
| 107 | } // namespace VideoCommon | ||
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstring> | 20 | #include <cstring> |
| 21 | #include <span> | ||
| 21 | #include <vector> | 22 | #include <vector> |
| 22 | 23 | ||
| 23 | #include <boost/container/static_vector.hpp> | 24 | #include <boost/container/static_vector.hpp> |
| @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 600 | return params; | 601 | return params; |
| 601 | } | 602 | } |
| 602 | 603 | ||
| 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, | 604 | static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, |
| 604 | u32 blockHeight) { | 605 | u32 blockHeight) { |
| 605 | // Don't actually care about the void extent, just read the bits... | 606 | // Don't actually care about the void extent, just read the bits... |
| 606 | for (s32 i = 0; i < 4; ++i) { | 607 | for (s32 i = 0; i < 4; ++i) { |
| @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block | |||
| 623 | } | 624 | } |
| 624 | } | 625 | } |
| 625 | 626 | ||
| 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | 627 | static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { |
| 627 | for (u32 j = 0; j < blockHeight; j++) { | 628 | for (u32 j = 0; j < blockHeight; j++) { |
| 628 | for (u32 i = 0; i < blockWidth; i++) { | 629 | for (u32 i = 0; i < blockWidth; i++) { |
| 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 630 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, | |||
| 1438 | #undef READ_INT_VALUES | 1439 | #undef READ_INT_VALUES |
| 1439 | } | 1440 | } |
| 1440 | 1441 | ||
| 1441 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, | 1442 | static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, |
| 1442 | u32* outBuf) { | 1443 | const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { |
| 1443 | InputBitStream strm(inBuf); | 1444 | InputBitStream strm(inBuf.data()); |
| 1444 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1445 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1445 | 1446 | ||
| 1446 | // Was there an error? | 1447 | // Was there an error? |
| @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1601 | } | 1602 | } |
| 1602 | 1603 | ||
| 1603 | // Read the texel weight data.. | 1604 | // Read the texel weight data.. |
| 1604 | u8 texelWeightData[16]; | 1605 | std::array<u8, 16> texelWeightData; |
| 1605 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1606 | std::ranges::copy(inBuf, texelWeightData.begin()); |
| 1606 | 1607 | ||
| 1607 | // Reverse everything | 1608 | // Reverse everything |
| 1608 | for (u32 i = 0; i < 8; i++) { | 1609 | for (u32 i = 0; i < 8; i++) { |
| @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1618 | 1619 | ||
| 1619 | // Make sure that higher non-texel bits are set to zero | 1620 | // Make sure that higher non-texel bits are set to zero |
| 1620 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1621 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1621 | texelWeightData[clearByteStart - 1] = | 1622 | if (clearByteStart > 0) { |
| 1622 | texelWeightData[clearByteStart - 1] & | 1623 | texelWeightData[clearByteStart - 1] &= |
| 1623 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1624 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1624 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1625 | } |
| 1626 | std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); | ||
| 1625 | 1627 | ||
| 1626 | IntegerEncodedVector texelWeightValues; | 1628 | IntegerEncodedVector texelWeightValues; |
| 1627 | 1629 | ||
| 1628 | InputBitStream weightStream(texelWeightData); | 1630 | InputBitStream weightStream(texelWeightData.data()); |
| 1629 | 1631 | ||
| 1630 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, | 1632 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1631 | weightParams.GetNumWeightValues()); | 1633 | weightParams.GetNumWeightValues()); |
| @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1672 | 1674 | ||
| 1673 | namespace Tegra::Texture::ASTC { | 1675 | namespace Tegra::Texture::ASTC { |
| 1674 | 1676 | ||
| 1675 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, | 1677 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 1676 | u32 block_height) { | 1678 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { |
| 1677 | u32 blockIdx = 0; | 1679 | u32 block_index = 0; |
| 1678 | std::size_t depth_offset = 0; | 1680 | std::size_t depth_offset = 0; |
| 1679 | std::vector<u8> outData(height * width * depth * 4); | 1681 | for (u32 z = 0; z < depth; z++) { |
| 1680 | for (u32 k = 0; k < depth; k++) { | 1682 | for (u32 y = 0; y < height; y += block_height) { |
| 1681 | for (u32 j = 0; j < height; j += block_height) { | 1683 | for (u32 x = 0; x < width; x += block_width) { |
| 1682 | for (u32 i = 0; i < width; i += block_width) { | 1684 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; |
| 1683 | |||
| 1684 | const u8* blockPtr = data + blockIdx * 16; | ||
| 1685 | 1685 | ||
| 1686 | // Blocks can be at most 12x12 | 1686 | // Blocks can be at most 12x12 |
| 1687 | u32 uncompData[144]; | 1687 | std::array<u32, 12 * 12> uncompData; |
| 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1689 | 1689 | ||
| 1690 | u32 decompWidth = std::min(block_width, width - i); | 1690 | u32 decompWidth = std::min(block_width, width - x); |
| 1691 | u32 decompHeight = std::min(block_height, height - j); | 1691 | u32 decompHeight = std::min(block_height, height - y); |
| 1692 | 1692 | ||
| 1693 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1693 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); |
| 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { | 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1695 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1695 | std::memcpy(outRow.data() + jj * width * 4, |
| 1696 | uncompData.data() + jj * block_width, decompWidth * 4); | ||
| 1696 | } | 1697 | } |
| 1697 | 1698 | ++block_index; | |
| 1698 | blockIdx++; | ||
| 1699 | } | 1699 | } |
| 1700 | } | 1700 | } |
| 1701 | depth_offset += height * width * 4; | 1701 | depth_offset += height * width * 4; |
| 1702 | } | 1702 | } |
| 1703 | |||
| 1704 | return outData; | ||
| 1705 | } | 1703 | } |
| 1706 | 1704 | ||
| 1707 | } // namespace Tegra::Texture::ASTC | 1705 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -5,11 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstdint> | 7 | #include <cstdint> |
| 8 | #include <vector> | ||
| 9 | 8 | ||
| 10 | namespace Tegra::Texture::ASTC { | 9 | namespace Tegra::Texture::ASTC { |
| 11 | 10 | ||
| 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 11 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 12 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |
| 14 | 13 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 14 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index bd1aebf02..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/textures/astc.h" | ||
| 15 | #include "video_core/textures/convert.h" | ||
| 16 | |||
| 17 | namespace Tegra::Texture { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | |||
| 21 | template <bool reverse> | ||
| 22 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 23 | union S8Z24 { | ||
| 24 | BitField<0, 24, u32> z24; | ||
| 25 | BitField<24, 8, u32> s8; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 28 | |||
| 29 | union Z24S8 { | ||
| 30 | BitField<0, 8, u32> s8; | ||
| 31 | BitField<8, 24, u32> z24; | ||
| 32 | }; | ||
| 33 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 34 | |||
| 35 | S8Z24 s8z24_pixel{}; | ||
| 36 | Z24S8 z24s8_pixel{}; | ||
| 37 | constexpr auto bpp{ | ||
| 38 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; | ||
| 39 | for (std::size_t y = 0; y < height; ++y) { | ||
| 40 | for (std::size_t x = 0; x < width; ++x) { | ||
| 41 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 42 | if constexpr (reverse) { | ||
| 43 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 44 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 45 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 46 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 47 | } else { | ||
| 48 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 49 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 50 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 51 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 58 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 59 | } | ||
| 60 | |||
| 61 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 62 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 63 | } | ||
| 64 | |||
| 65 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, | ||
| 66 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { | ||
| 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 69 | u32 block_width{}; | ||
| 70 | u32 block_height{}; | ||
| 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 72 | const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( | ||
| 73 | in_data, width, height, depth, block_width, block_height); | ||
| 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); | ||
| 75 | |||
| 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 82 | bool convert_astc, bool convert_s8z24) { | ||
| 83 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 84 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 85 | pixel_format); | ||
| 86 | UNREACHABLE(); | ||
| 87 | |||
| 88 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 89 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null | |||
| @@ -1,22 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCore::Surface { | ||
| 10 | enum class PixelFormat; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Tegra::Texture { | ||
| 14 | |||
| 15 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, | ||
| 16 | u32 width, u32 height, u32 depth, bool convert_astc, | ||
| 17 | bool convert_s8z24); | ||
| 18 | |||
| 19 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 20 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 21 | |||
| 22 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -2,204 +2,111 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <cmath> | 6 | #include <cmath> |
| 6 | #include <cstring> | 7 | #include <cstring> |
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 7 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 14 | #include "common/div_ceil.h" | ||
| 10 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 11 | #include "video_core/textures/decoders.h" | 16 | #include "video_core/textures/decoders.h" |
| 12 | #include "video_core/textures/texture.h" | 17 | #include "video_core/textures/texture.h" |
| 13 | 18 | ||
| 14 | namespace Tegra::Texture { | 19 | namespace Tegra::Texture { |
| 15 | namespace { | ||
| 16 | 20 | ||
| 21 | namespace { | ||
| 17 | /** | 22 | /** |
| 18 | * This table represents the internal swizzle of a gob, | 23 | * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. |
| 19 | * in format 16 bytes x 2 sector packing. | ||
| 20 | * Calculates the offset of an (x, y) position within a swizzled texture. | 24 | * Calculates the offset of an (x, y) position within a swizzled texture. |
| 21 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | 25 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 |
| 22 | */ | 26 | */ |
| 23 | template <std::size_t N, std::size_t M, u32 Align> | 27 | constexpr SwizzleTable MakeSwizzleTableConst() { |
| 24 | struct alignas(64) SwizzleTable { | 28 | SwizzleTable table{}; |
| 25 | static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); | 29 | for (u32 y = 0; y < table.size(); ++y) { |
| 26 | constexpr SwizzleTable() { | 30 | for (u32 x = 0; x < table[0].size(); ++x) { |
| 27 | for (u32 y = 0; y < N; ++y) { | 31 | table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + |
| 28 | for (u32 x = 0; x < M; ++x) { | 32 | (y % 2) * 16 + (x % 16); |
| 29 | const u32 x2 = x * Align; | ||
| 30 | values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + | ||
| 31 | ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); | ||
| 32 | } | ||
| 33 | } | 33 | } |
| 34 | } | 34 | } |
| 35 | const std::array<u16, M>& operator[](std::size_t index) const { | 35 | return table; |
| 36 | return values[index]; | 36 | } |
| 37 | } | ||
| 38 | std::array<std::array<u16, M>, N> values{}; | ||
| 39 | }; | ||
| 40 | 37 | ||
| 41 | constexpr u32 FAST_SWIZZLE_ALIGN = 16; | 38 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); |
| 42 | 39 | ||
| 43 | constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); | 40 | template <bool TO_LINEAR> |
| 44 | constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); | 41 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 42 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | ||
| 43 | // The origin of the transformation can be configured here, leave it as zero as the current API | ||
| 44 | // doesn't expose it. | ||
| 45 | static constexpr u32 origin_x = 0; | ||
| 46 | static constexpr u32 origin_y = 0; | ||
| 47 | static constexpr u32 origin_z = 0; | ||
| 45 | 48 | ||
| 46 | /** | 49 | // We can configure here a custom pitch |
| 47 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 50 | // As it's not exposed 'width * bpp' will be the expected pitch. |
| 48 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 51 | const u32 pitch = width * bytes_per_pixel; |
| 49 | * those. Block_Width is assumed to be 1. | 52 | const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; |
| 50 | */ | ||
| 51 | void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 52 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 53 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 54 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 55 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 56 | std::array<u8*, 2> data_ptrs; | ||
| 57 | u32 z_address = tile_offset; | ||
| 58 | |||
| 59 | for (u32 z = z_start; z < z_end; z++) { | ||
| 60 | u32 y_address = z_address; | ||
| 61 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 62 | for (u32 y = y_start; y < y_end; y++) { | ||
| 63 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 64 | for (u32 x = x_start; x < x_end; x++) { | ||
| 65 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; | ||
| 66 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | ||
| 67 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||
| 68 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||
| 69 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 70 | } | ||
| 71 | pixel_base += stride_x; | ||
| 72 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 73 | y_address += GOB_SIZE; | ||
| 74 | } | ||
| 75 | z_address += xy_block_size; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | 53 | ||
| 79 | /** | 54 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); |
| 80 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 55 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); |
| 81 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 56 | const u32 slice_size = |
| 82 | * those. Block_Width is assumed to be 1. | 57 | Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; |
| 83 | */ | ||
| 84 | void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 85 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 86 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 87 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 88 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 89 | std::array<u8*, 2> data_ptrs; | ||
| 90 | u32 z_address = tile_offset; | ||
| 91 | const u32 x_startb = x_start * bytes_per_pixel; | ||
| 92 | const u32 x_endb = x_end * bytes_per_pixel; | ||
| 93 | |||
| 94 | for (u32 z = z_start; z < z_end; z++) { | ||
| 95 | u32 y_address = z_address; | ||
| 96 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 97 | for (u32 y = y_start; y < y_end; y++) { | ||
| 98 | const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 99 | for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { | ||
| 100 | const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; | ||
| 101 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | ||
| 102 | const u32 pixel_index{out_x + pixel_base}; | ||
| 103 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; | ||
| 104 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; | ||
| 105 | std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); | ||
| 106 | } | ||
| 107 | pixel_base += stride_x; | ||
| 108 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 109 | y_address += GOB_SIZE; | ||
| 110 | } | ||
| 111 | z_address += xy_block_size; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | 58 | ||
| 115 | /** | 59 | const u32 block_height_mask = (1U << block_height) - 1; |
| 116 | * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. | 60 | const u32 block_depth_mask = (1U << block_depth) - 1; |
| 117 | * The body of this function takes care of splitting the swizzled texture into blocks, | 61 | const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; |
| 118 | * and managing the extents of it. Once all the parameters of a single block are obtained, | 62 | |
| 119 | * the function calls 'ProcessBlock' to process that particular Block. | 63 | for (u32 slice = 0; slice < depth; ++slice) { |
| 120 | * | 64 | const u32 z = slice + origin_z; |
| 121 | * Documentation for the memory layout and decoding can be found at: | 65 | const u32 offset_z = (z >> block_depth) * slice_size + |
| 122 | * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces | 66 | ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); |
| 123 | */ | 67 | for (u32 line = 0; line < height; ++line) { |
| 124 | template <bool fast> | 68 | const u32 y = line + origin_y; |
| 125 | void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | 69 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 126 | const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, | 70 | |
| 127 | const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, | 71 | const u32 block_y = y >> GOB_SIZE_Y_SHIFT; |
| 128 | const u32 width_spacing) { | 72 | const u32 offset_y = (block_y >> block_height) * block_size + |
| 129 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 73 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); |
| 130 | const u32 stride_x = width * out_bytes_per_pixel; | 74 | |
| 131 | const u32 layer_z = height * stride_x; | 75 | for (u32 column = 0; column < width; ++column) { |
| 132 | const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; | 76 | const u32 x = (column + origin_x) * bytes_per_pixel; |
| 133 | constexpr u32 gob_elements_y = GOB_SIZE_Y; | 77 | const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 134 | constexpr u32 gob_elements_z = GOB_SIZE_Z; | 78 | |
| 135 | const u32 block_x_elements = gob_elements_x; | 79 | const u32 base_swizzled_offset = offset_z + offset_y + offset_x; |
| 136 | const u32 block_y_elements = gob_elements_y * block_height; | 80 | const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; |
| 137 | const u32 block_z_elements = gob_elements_z * block_depth; | 81 | |
| 138 | const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); | 82 | const u32 unswizzled_offset = |
| 139 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); | 83 | slice * pitch * height + line * pitch + column * bytes_per_pixel; |
| 140 | const u32 blocks_on_y = div_ceil(height, block_y_elements); | 84 | |
| 141 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); | 85 | u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; |
| 142 | const u32 xy_block_size = GOB_SIZE * block_height; | 86 | const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; |
| 143 | const u32 block_size = xy_block_size * block_depth; | 87 | std::memcpy(dst, src, bytes_per_pixel); |
| 144 | u32 tile_offset = 0; | ||
| 145 | for (u32 zb = 0; zb < blocks_on_z; zb++) { | ||
| 146 | const u32 z_start = zb * block_z_elements; | ||
| 147 | const u32 z_end = std::min(depth, z_start + block_z_elements); | ||
| 148 | for (u32 yb = 0; yb < blocks_on_y; yb++) { | ||
| 149 | const u32 y_start = yb * block_y_elements; | ||
| 150 | const u32 y_end = std::min(height, y_start + block_y_elements); | ||
| 151 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | ||
| 152 | const u32 x_start = xb * block_x_elements; | ||
| 153 | const u32 x_end = std::min(width, x_start + block_x_elements); | ||
| 154 | if constexpr (fast) { | ||
| 155 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 156 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 157 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 158 | } else { | ||
| 159 | PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 160 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 161 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 162 | } | ||
| 163 | tile_offset += block_size; | ||
| 164 | } | 88 | } |
| 165 | } | 89 | } |
| 166 | } | 90 | } |
| 167 | } | 91 | } |
| 168 | |||
| 169 | } // Anonymous namespace | 92 | } // Anonymous namespace |
| 170 | 93 | ||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 94 | SwizzleTable MakeSwizzleTable() { |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 95 | return SWIZZLE_TABLE; |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | ||
| 174 | const u32 block_height_size{1U << block_height}; | ||
| 175 | const u32 block_depth_size{1U << block_depth}; | ||
| 176 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { | ||
| 177 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 178 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 179 | block_depth_size, width_spacing); | ||
| 180 | } else { | ||
| 181 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 182 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 183 | block_depth_size, width_spacing); | ||
| 184 | } | ||
| 185 | } | 96 | } |
| 186 | 97 | ||
| 187 | void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 98 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 188 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, | 99 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 189 | u32 block_depth, u32 width_spacing) { | 100 | u32 stride_alignment) { |
| 190 | CopySwizzledData((width + tile_size_x - 1) / tile_size_x, | 101 | Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 191 | (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, | 102 | stride_alignment); |
| 192 | bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, | ||
| 193 | width_spacing); | ||
| 194 | } | 103 | } |
| 195 | 104 | ||
| 196 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 105 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 197 | u32 width, u32 height, u32 depth, u32 block_height, | 106 | u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 198 | u32 block_depth, u32 width_spacing) { | 107 | u32 stride_alignment) { |
| 199 | std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); | 108 | Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 200 | UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, | 109 | stride_alignment); |
| 201 | width, height, depth, block_height, block_depth, width_spacing); | ||
| 202 | return unswizzled_data; | ||
| 203 | } | 110 | } |
| 204 | 111 | ||
| 205 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 112 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 213 | const u32 gob_address_y = | 120 | const u32 gob_address_y = |
| 214 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 121 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 215 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 122 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 216 | const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; | 123 | const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; |
| 217 | for (u32 x = 0; x < subrect_width; ++x) { | 124 | for (u32 x = 0; x < subrect_width; ++x) { |
| 218 | const u32 dst_x = x + offset_x; | 125 | const u32 dst_x = x + offset_x; |
| 219 | const u32 gob_address = | 126 | const u32 gob_address = |
| @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 235 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | 142 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); |
| 236 | 143 | ||
| 237 | const u32 block_height_mask = (1U << block_height) - 1; | 144 | const u32 block_height_mask = (1U << block_height) - 1; |
| 238 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; | 145 | const u32 x_shift = GOB_SIZE_SHIFT + block_height; |
| 239 | 146 | ||
| 240 | for (u32 line = 0; line < line_count; ++line) { | 147 | for (u32 line = 0; line < line_count; ++line) { |
| 241 | const u32 src_y = line + origin_y; | 148 | const u32 src_y = line + origin_y; |
| 242 | const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; | 149 | const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; |
| 243 | 150 | ||
| 244 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; | 151 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; |
| 245 | const u32 src_offset_y = (block_y >> block_height) * block_size + | 152 | const u32 src_offset_y = (block_y >> block_height) * block_size + |
| @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt | |||
| 270 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; | 177 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; |
| 271 | 178 | ||
| 272 | for (u32 line = 0; line < line_count; ++line) { | 179 | for (u32 line = 0; line < line_count; ++line) { |
| 273 | const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; | 180 | const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; |
| 274 | const u32 block_y = line / GOB_SIZE_Y; | 181 | const u32 block_y = line / GOB_SIZE_Y; |
| 275 | const u32 dst_offset_y = | 182 | const u32 dst_offset_y = |
| 276 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | 183 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; |
| @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 | |||
| 293 | const std::size_t gob_address_y = | 200 | const std::size_t gob_address_y = |
| 294 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 201 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 295 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 202 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 296 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | 203 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 297 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 204 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 298 | const std::size_t gob_address = | 205 | const std::size_t gob_address = |
| 299 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; | 206 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -4,7 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <span> |
| 8 | |||
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; | |||
| 15 | constexpr u32 GOB_SIZE_Z = 1; | 16 | constexpr u32 GOB_SIZE_Z = 1; |
| 16 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | 17 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
| 17 | 18 | ||
| 18 | constexpr std::size_t GOB_SIZE_X_SHIFT = 6; | 19 | constexpr u32 GOB_SIZE_X_SHIFT = 6; |
| 19 | constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; | 20 | constexpr u32 GOB_SIZE_Y_SHIFT = 3; |
| 20 | constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; | 21 | constexpr u32 GOB_SIZE_Z_SHIFT = 0; |
| 21 | constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | 22 | constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
| 22 | 23 | ||
| 23 | /// Unswizzles a swizzled texture without changing its format. | 24 | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; |
| 24 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 25 | |
| 25 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 26 | /// Returns a z-order swizzle table |
| 26 | u32 block_height = TICEntry::DefaultBlockHeight, | 27 | SwizzleTable MakeSwizzleTable(); |
| 27 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 28 | |
| 28 | 29 | /// Unswizzles a block linear texture into linear memory. | |
| 29 | /// Unswizzles a swizzled texture without changing its format. | 30 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 30 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 31 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 31 | u32 width, u32 height, u32 depth, | 32 | u32 stride_alignment = 1); |
| 32 | u32 block_height = TICEntry::DefaultBlockHeight, | 33 | |
| 33 | u32 block_depth = TICEntry::DefaultBlockHeight, | 34 | /// Swizzles linear memory into a block linear texture. |
| 34 | u32 width_spacing = 0); | 35 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 35 | 36 | u32 height, u32 depth, u32 block_height, u32 block_depth, | |
| 36 | /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. | 37 | u32 stride_alignment = 1); |
| 37 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | ||
| 38 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | ||
| 39 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | ||
| 40 | 38 | ||
| 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. | 39 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 40 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/cityhash.h" | ||
| 8 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| 12 | using Tegra::Texture::TICEntry; | ||
| 13 | using Tegra::Texture::TSCEntry; | ||
| 14 | |||
| 11 | namespace Tegra::Texture { | 15 | namespace Tegra::Texture { |
| 12 | 16 | ||
| 13 | namespace { | 17 | namespace { |
| @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { | |||
| 65 | 69 | ||
| 66 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 67 | 71 | ||
| 68 | std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | 72 | std::array<float, 4> TSCEntry::BorderColor() const noexcept { |
| 69 | if (!srgb_conversion) { | 73 | if (!srgb_conversion) { |
| 70 | return border_color; | 74 | return border_color; |
| 71 | } | 75 | } |
| @@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | |||
| 73 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; | 77 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; |
| 74 | } | 78 | } |
| 75 | 79 | ||
| 76 | float TSCEntry::GetMaxAnisotropy() const noexcept { | 80 | float TSCEntry::MaxAnisotropy() const noexcept { |
| 77 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); | 81 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); |
| 78 | } | 82 | } |
| 79 | 83 | ||
| 80 | } // namespace Tegra::Texture | 84 | } // namespace Tegra::Texture |
| 85 | |||
| 86 | size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept { | ||
| 87 | return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic); | ||
| 88 | } | ||
| 89 | |||
| 90 | size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept { | ||
| 91 | return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc); | ||
| 92 | } | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bbc7e3eaf..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { | |||
| 53 | BC4 = 0x27, | 53 | BC4 = 0x27, |
| 54 | BC5 = 0x28, | 54 | BC5 = 0x28, |
| 55 | S8D24 = 0x29, | 55 | S8D24 = 0x29, |
| 56 | X8Z24 = 0x2a, | 56 | X8D24 = 0x2a, |
| 57 | D24S8 = 0x2b, | 57 | D24S8 = 0x2b, |
| 58 | X4V4Z24__COV4R4V = 0x2c, | 58 | X4V4D24__COV4R4V = 0x2c, |
| 59 | X4V4Z24__COV8R8V = 0x2d, | 59 | X4V4D24__COV8R8V = 0x2d, |
| 60 | V8Z24__COV4R12V = 0x2e, | 60 | V8D24__COV4R12V = 0x2e, |
| 61 | D32 = 0x2f, | 61 | D32 = 0x2f, |
| 62 | D32S8 = 0x30, | 62 | D32S8 = 0x30, |
| 63 | X8Z24_X20V4S8__COV4R4V = 0x31, | 63 | X8D24_X20V4S8__COV4R4V = 0x31, |
| 64 | X8Z24_X20V4S8__COV8R8V = 0x32, | 64 | X8D24_X20V4S8__COV8R8V = 0x32, |
| 65 | ZF32_X20V4X8__COV4R4V = 0x33, | 65 | D32_X20V4X8__COV4R4V = 0x33, |
| 66 | ZF32_X20V4X8__COV8R8V = 0x34, | 66 | D32_X20V4X8__COV8R8V = 0x34, |
| 67 | ZF32_X20V4S8__COV4R4V = 0x35, | 67 | D32_X20V4S8__COV4R4V = 0x35, |
| 68 | ZF32_X20V4S8__COV8R8V = 0x36, | 68 | D32_X20V4S8__COV8R8V = 0x36, |
| 69 | X8Z24_X16V8S8__COV4R12V = 0x37, | 69 | X8D24_X16V8S8__COV4R12V = 0x37, |
| 70 | ZF32_X16V8X8__COV4R12V = 0x38, | 70 | D32_X16V8X8__COV4R12V = 0x38, |
| 71 | ZF32_X16V8S8__COV4R12V = 0x39, | 71 | D32_X16V8S8__COV4R12V = 0x39, |
| 72 | D16 = 0x3a, | 72 | D16 = 0x3a, |
| 73 | V8Z24__COV8R24V = 0x3b, | 73 | V8D24__COV8R24V = 0x3b, |
| 74 | X8Z24_X16V8S8__COV8R24V = 0x3c, | 74 | X8D24_X16V8S8__COV8R24V = 0x3c, |
| 75 | ZF32_X16V8X8__COV8R24V = 0x3d, | 75 | D32_X16V8X8__COV8R24V = 0x3d, |
| 76 | ZF32_X16V8S8__COV8R24V = 0x3e, | 76 | D32_X16V8S8__COV8R24V = 0x3e, |
| 77 | ASTC_2D_4X4 = 0x40, | 77 | ASTC_2D_4X4 = 0x40, |
| 78 | ASTC_2D_5X5 = 0x41, | 78 | ASTC_2D_5X5 = 0x41, |
| 79 | ASTC_2D_6X6 = 0x42, | 79 | ASTC_2D_6X6 = 0x42, |
| @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { | |||
| 146 | }; | 146 | }; |
| 147 | 147 | ||
| 148 | union TextureHandle { | 148 | union TextureHandle { |
| 149 | /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} | 149 | /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} |
| 150 | 150 | ||
| 151 | u32 raw; | 151 | u32 raw; |
| 152 | BitField<0, 20, u32> tic_id; | 152 | BitField<0, 20, u32> tic_id; |
| @@ -155,124 +155,124 @@ union TextureHandle { | |||
| 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); |
| 156 | 156 | ||
| 157 | struct TICEntry { | 157 | struct TICEntry { |
| 158 | static constexpr u32 DefaultBlockHeight = 16; | ||
| 159 | static constexpr u32 DefaultBlockDepth = 1; | ||
| 160 | |||
| 161 | union { | ||
| 162 | u32 raw; | ||
| 163 | BitField<0, 7, TextureFormat> format; | ||
| 164 | BitField<7, 3, ComponentType> r_type; | ||
| 165 | BitField<10, 3, ComponentType> g_type; | ||
| 166 | BitField<13, 3, ComponentType> b_type; | ||
| 167 | BitField<16, 3, ComponentType> a_type; | ||
| 168 | |||
| 169 | BitField<19, 3, SwizzleSource> x_source; | ||
| 170 | BitField<22, 3, SwizzleSource> y_source; | ||
| 171 | BitField<25, 3, SwizzleSource> z_source; | ||
| 172 | BitField<28, 3, SwizzleSource> w_source; | ||
| 173 | }; | ||
| 174 | u32 address_low; | ||
| 175 | union { | 158 | union { |
| 176 | BitField<0, 16, u32> address_high; | 159 | struct { |
| 177 | BitField<21, 3, TICHeaderVersion> header_version; | 160 | union { |
| 178 | }; | 161 | BitField<0, 7, TextureFormat> format; |
| 179 | union { | 162 | BitField<7, 3, ComponentType> r_type; |
| 180 | BitField<0, 3, u32> block_width; | 163 | BitField<10, 3, ComponentType> g_type; |
| 181 | BitField<3, 3, u32> block_height; | 164 | BitField<13, 3, ComponentType> b_type; |
| 182 | BitField<6, 3, u32> block_depth; | 165 | BitField<16, 3, ComponentType> a_type; |
| 166 | |||
| 167 | BitField<19, 3, SwizzleSource> x_source; | ||
| 168 | BitField<22, 3, SwizzleSource> y_source; | ||
| 169 | BitField<25, 3, SwizzleSource> z_source; | ||
| 170 | BitField<28, 3, SwizzleSource> w_source; | ||
| 171 | }; | ||
| 172 | u32 address_low; | ||
| 173 | union { | ||
| 174 | BitField<0, 16, u32> address_high; | ||
| 175 | BitField<16, 5, u32> layer_base_3_7; | ||
| 176 | BitField<21, 3, TICHeaderVersion> header_version; | ||
| 177 | BitField<24, 1, u32> load_store_hint; | ||
| 178 | BitField<25, 4, u32> view_coherency_hash; | ||
| 179 | BitField<29, 3, u32> layer_base_8_10; | ||
| 180 | }; | ||
| 181 | union { | ||
| 182 | BitField<0, 3, u32> block_width; | ||
| 183 | BitField<3, 3, u32> block_height; | ||
| 184 | BitField<6, 3, u32> block_depth; | ||
| 183 | 185 | ||
| 184 | BitField<10, 3, u32> tile_width_spacing; | 186 | BitField<10, 3, u32> tile_width_spacing; |
| 185 | 187 | ||
| 186 | // High 16 bits of the pitch value | 188 | // High 16 bits of the pitch value |
| 187 | BitField<0, 16, u32> pitch_high; | 189 | BitField<0, 16, u32> pitch_high; |
| 188 | BitField<26, 1, u32> use_header_opt_control; | 190 | BitField<26, 1, u32> use_header_opt_control; |
| 189 | BitField<27, 1, u32> depth_texture; | 191 | BitField<27, 1, u32> depth_texture; |
| 190 | BitField<28, 4, u32> max_mip_level; | 192 | BitField<28, 4, u32> max_mip_level; |
| 191 | 193 | ||
| 192 | BitField<0, 16, u32> buffer_high_width_minus_one; | 194 | BitField<0, 16, u32> buffer_high_width_minus_one; |
| 193 | }; | 195 | }; |
| 194 | union { | 196 | union { |
| 195 | BitField<0, 16, u32> width_minus_1; | 197 | BitField<0, 16, u32> width_minus_one; |
| 196 | BitField<22, 1, u32> srgb_conversion; | 198 | BitField<16, 3, u32> layer_base_0_2; |
| 197 | BitField<23, 4, TextureType> texture_type; | 199 | BitField<22, 1, u32> srgb_conversion; |
| 198 | BitField<29, 3, u32> border_size; | 200 | BitField<23, 4, TextureType> texture_type; |
| 201 | BitField<29, 3, u32> border_size; | ||
| 199 | 202 | ||
| 200 | BitField<0, 16, u32> buffer_low_width_minus_one; | 203 | BitField<0, 16, u32> buffer_low_width_minus_one; |
| 201 | }; | 204 | }; |
| 202 | union { | 205 | union { |
| 203 | BitField<0, 16, u32> height_minus_1; | 206 | BitField<0, 16, u32> height_minus_1; |
| 204 | BitField<16, 14, u32> depth_minus_1; | 207 | BitField<16, 14, u32> depth_minus_1; |
| 205 | }; | 208 | BitField<30, 1, u32> is_sparse; |
| 206 | union { | 209 | BitField<31, 1, u32> normalized_coords; |
| 207 | BitField<6, 13, u32> mip_lod_bias; | 210 | }; |
| 208 | BitField<27, 3, u32> max_anisotropy; | 211 | union { |
| 212 | BitField<6, 13, u32> mip_lod_bias; | ||
| 213 | BitField<27, 3, u32> max_anisotropy; | ||
| 214 | }; | ||
| 215 | union { | ||
| 216 | BitField<0, 4, u32> res_min_mip_level; | ||
| 217 | BitField<4, 4, u32> res_max_mip_level; | ||
| 218 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 219 | BitField<12, 12, u32> min_lod_clamp; | ||
| 220 | }; | ||
| 221 | }; | ||
| 222 | std::array<u64, 4> raw; | ||
| 209 | }; | 223 | }; |
| 210 | 224 | ||
| 211 | union { | 225 | constexpr bool operator==(const TICEntry& rhs) const noexcept { |
| 212 | BitField<0, 4, u32> res_min_mip_level; | 226 | return raw == rhs.raw; |
| 213 | BitField<4, 4, u32> res_max_mip_level; | 227 | } |
| 214 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 215 | BitField<12, 12, u32> min_lod_clamp; | ||
| 216 | }; | ||
| 217 | 228 | ||
| 218 | GPUVAddr Address() const { | 229 | constexpr bool operator!=(const TICEntry& rhs) const noexcept { |
| 230 | return raw != rhs.raw; | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr GPUVAddr Address() const { | ||
| 219 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | 234 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); |
| 220 | } | 235 | } |
| 221 | 236 | ||
| 222 | u32 Pitch() const { | 237 | constexpr u32 Pitch() const { |
| 223 | ASSERT(header_version == TICHeaderVersion::Pitch || | 238 | ASSERT(header_version == TICHeaderVersion::Pitch || |
| 224 | header_version == TICHeaderVersion::PitchColorKey); | 239 | header_version == TICHeaderVersion::PitchColorKey); |
| 225 | // The pitch value is 21 bits, and is 32B aligned. | 240 | // The pitch value is 21 bits, and is 32B aligned. |
| 226 | return pitch_high << 5; | 241 | return pitch_high << 5; |
| 227 | } | 242 | } |
| 228 | 243 | ||
| 229 | u32 Width() const { | 244 | constexpr u32 Width() const { |
| 230 | if (header_version != TICHeaderVersion::OneDBuffer) { | 245 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 231 | return width_minus_1 + 1; | 246 | return width_minus_one + 1; |
| 232 | } | 247 | } |
| 233 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; | 248 | return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; |
| 234 | } | 249 | } |
| 235 | 250 | ||
| 236 | u32 Height() const { | 251 | constexpr u32 Height() const { |
| 237 | return height_minus_1 + 1; | 252 | return height_minus_1 + 1; |
| 238 | } | 253 | } |
| 239 | 254 | ||
| 240 | u32 Depth() const { | 255 | constexpr u32 Depth() const { |
| 241 | return depth_minus_1 + 1; | 256 | return depth_minus_1 + 1; |
| 242 | } | 257 | } |
| 243 | 258 | ||
| 244 | u32 BlockWidth() const { | 259 | constexpr u32 BaseLayer() const { |
| 245 | ASSERT(IsTiled()); | 260 | return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; |
| 246 | return block_width; | ||
| 247 | } | ||
| 248 | |||
| 249 | u32 BlockHeight() const { | ||
| 250 | ASSERT(IsTiled()); | ||
| 251 | return block_height; | ||
| 252 | } | ||
| 253 | |||
| 254 | u32 BlockDepth() const { | ||
| 255 | ASSERT(IsTiled()); | ||
| 256 | return block_depth; | ||
| 257 | } | 261 | } |
| 258 | 262 | ||
| 259 | bool IsTiled() const { | 263 | constexpr bool IsBlockLinear() const { |
| 260 | return header_version == TICHeaderVersion::BlockLinear || | 264 | return header_version == TICHeaderVersion::BlockLinear || |
| 261 | header_version == TICHeaderVersion::BlockLinearColorKey; | 265 | header_version == TICHeaderVersion::BlockLinearColorKey; |
| 262 | } | 266 | } |
| 263 | 267 | ||
| 264 | bool IsLineal() const { | 268 | constexpr bool IsPitchLinear() const { |
| 265 | return header_version == TICHeaderVersion::Pitch || | 269 | return header_version == TICHeaderVersion::Pitch || |
| 266 | header_version == TICHeaderVersion::PitchColorKey; | 270 | header_version == TICHeaderVersion::PitchColorKey; |
| 267 | } | 271 | } |
| 268 | 272 | ||
| 269 | bool IsBuffer() const { | 273 | constexpr bool IsBuffer() const { |
| 270 | return header_version == TICHeaderVersion::OneDBuffer; | 274 | return header_version == TICHeaderVersion::OneDBuffer; |
| 271 | } | 275 | } |
| 272 | |||
| 273 | bool IsSrgbConversionEnabled() const { | ||
| 274 | return srgb_conversion != 0; | ||
| 275 | } | ||
| 276 | }; | 276 | }; |
| 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); | 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); |
| 278 | 278 | ||
| @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { | |||
| 309 | Linear = 3, | 309 | Linear = 3, |
| 310 | }; | 310 | }; |
| 311 | 311 | ||
| 312 | enum class SamplerReduction : u32 { | ||
| 313 | WeightedAverage = 0, | ||
| 314 | Min = 1, | ||
| 315 | Max = 2, | ||
| 316 | }; | ||
| 317 | |||
| 312 | enum class Anisotropy { | 318 | enum class Anisotropy { |
| 313 | Default, | 319 | Default, |
| 314 | Filter2x, | 320 | Filter2x, |
| @@ -333,8 +339,12 @@ struct TSCEntry { | |||
| 333 | BitField<0, 2, TextureFilter> mag_filter; | 339 | BitField<0, 2, TextureFilter> mag_filter; |
| 334 | BitField<4, 2, TextureFilter> min_filter; | 340 | BitField<4, 2, TextureFilter> min_filter; |
| 335 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; | 341 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; |
| 342 | BitField<8, 1, u32> cubemap_anisotropy; | ||
| 336 | BitField<9, 1, u32> cubemap_interface_filtering; | 343 | BitField<9, 1, u32> cubemap_interface_filtering; |
| 344 | BitField<10, 2, SamplerReduction> reduction_filter; | ||
| 337 | BitField<12, 13, u32> mip_lod_bias; | 345 | BitField<12, 13, u32> mip_lod_bias; |
| 346 | BitField<25, 1, u32> float_coord_normalization; | ||
| 347 | BitField<26, 5, u32> trilin_opt; | ||
| 338 | }; | 348 | }; |
| 339 | union { | 349 | union { |
| 340 | BitField<0, 12, u32> min_lod_clamp; | 350 | BitField<0, 12, u32> min_lod_clamp; |
| @@ -347,32 +357,45 @@ struct TSCEntry { | |||
| 347 | }; | 357 | }; |
| 348 | std::array<f32, 4> border_color; | 358 | std::array<f32, 4> border_color; |
| 349 | }; | 359 | }; |
| 350 | std::array<u8, 0x20> raw; | 360 | std::array<u64, 4> raw; |
| 351 | }; | 361 | }; |
| 352 | 362 | ||
| 353 | std::array<float, 4> GetBorderColor() const noexcept; | 363 | constexpr bool operator==(const TSCEntry& rhs) const noexcept { |
| 364 | return raw == rhs.raw; | ||
| 365 | } | ||
| 366 | |||
| 367 | constexpr bool operator!=(const TSCEntry& rhs) const noexcept { | ||
| 368 | return raw != rhs.raw; | ||
| 369 | } | ||
| 370 | |||
| 371 | std::array<float, 4> BorderColor() const noexcept; | ||
| 354 | 372 | ||
| 355 | float GetMaxAnisotropy() const noexcept; | 373 | float MaxAnisotropy() const noexcept; |
| 356 | 374 | ||
| 357 | float GetMinLod() const { | 375 | float MinLod() const { |
| 358 | return static_cast<float>(min_lod_clamp) / 256.0f; | 376 | return static_cast<float>(min_lod_clamp) / 256.0f; |
| 359 | } | 377 | } |
| 360 | 378 | ||
| 361 | float GetMaxLod() const { | 379 | float MaxLod() const { |
| 362 | return static_cast<float>(max_lod_clamp) / 256.0f; | 380 | return static_cast<float>(max_lod_clamp) / 256.0f; |
| 363 | } | 381 | } |
| 364 | 382 | ||
| 365 | float GetLodBias() const { | 383 | float LodBias() const { |
| 366 | // Sign extend the 13-bit value. | 384 | // Sign extend the 13-bit value. |
| 367 | constexpr u32 mask = 1U << (13 - 1); | 385 | static constexpr u32 mask = 1U << (13 - 1); |
| 368 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; | 386 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; |
| 369 | } | 387 | } |
| 370 | }; | 388 | }; |
| 371 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); | 389 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |
| 372 | 390 | ||
| 373 | struct FullTextureInfo { | 391 | } // namespace Tegra::Texture |
| 374 | TICEntry tic; | 392 | |
| 375 | TSCEntry tsc; | 393 | template <> |
| 394 | struct std::hash<Tegra::Texture::TICEntry> { | ||
| 395 | size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; | ||
| 376 | }; | 396 | }; |
| 377 | 397 | ||
| 378 | } // namespace Tegra::Texture | 398 | template <> |
| 399 | struct std::hash<Tegra::Texture::TSCEntry> { | ||
| 400 | size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; | ||
| 401 | }; | ||