diff options
Diffstat (limited to 'src')
328 files changed, 13038 insertions, 10463 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a22b564d6..61adbef28 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -45,10 +45,15 @@ if (MSVC) | |||
| 45 | 45 | ||
| 46 | # Warnings | 46 | # Warnings |
| 47 | /W3 | 47 | /W3 |
| 48 | /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled | ||
| 49 | /we4101 # 'identifier': unreferenced local variable | ||
| 50 | /we4265 # 'class': class has virtual functions, but destructor is not virtual | ||
| 51 | /we4388 # signed/unsigned mismatch | ||
| 48 | /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect | 52 | /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect |
| 49 | /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? | 53 | /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? |
| 50 | /we4555 # Expression has no effect; expected expression with side-effect | 54 | /we4555 # Expression has no effect; expected expression with side-effect |
| 51 | /we4834 # Discarding return value of function with 'nodiscard' attribute | 55 | /we4834 # Discarding return value of function with 'nodiscard' attribute |
| 56 | /we5038 # data member 'member1' will be initialized after data member 'member2' | ||
| 52 | ) | 57 | ) |
| 53 | 58 | ||
| 54 | # /GS- - No stack buffer overflow checks | 59 | # /GS- - No stack buffer overflow checks |
| @@ -62,6 +67,7 @@ else() | |||
| 62 | -Werror=implicit-fallthrough | 67 | -Werror=implicit-fallthrough |
| 63 | -Werror=missing-declarations | 68 | -Werror=missing-declarations |
| 64 | -Werror=reorder | 69 | -Werror=reorder |
| 70 | -Werror=uninitialized | ||
| 65 | -Werror=unused-result | 71 | -Werror=unused-result |
| 66 | -Wextra | 72 | -Wextra |
| 67 | -Wmissing-declarations | 73 | -Wmissing-declarations |
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index 699fcb84c..3b4144e21 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp | |||
| @@ -218,7 +218,7 @@ void Resample(s32* output, const s32* input, s32 pitch, s32& fraction, std::size | |||
| 218 | const auto l2 = lut[lut_index + 2]; | 218 | const auto l2 = lut[lut_index + 2]; |
| 219 | const auto l3 = lut[lut_index + 3]; | 219 | const auto l3 = lut[lut_index + 3]; |
| 220 | 220 | ||
| 221 | const auto s0 = static_cast<s32>(input[index]); | 221 | const auto s0 = static_cast<s32>(input[index + 0]); |
| 222 | const auto s1 = static_cast<s32>(input[index + 1]); | 222 | const auto s1 = static_cast<s32>(input[index + 1]); |
| 223 | const auto s2 = static_cast<s32>(input[index + 2]); | 223 | const auto s2 = static_cast<s32>(input[index + 2]); |
| 224 | const auto s3 = static_cast<s32>(input[index + 3]); | 224 | const auto s3 = static_cast<s32>(input[index + 3]); |
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 179560cd7..d2ce8c814 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "audio_core/info_updater.h" | 11 | #include "audio_core/info_updater.h" |
| 12 | #include "audio_core/voice_context.h" | 12 | #include "audio_core/voice_context.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "core/hle/kernel/writable_event.h" | ||
| 15 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 16 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 17 | 16 | ||
| @@ -71,10 +70,9 @@ namespace { | |||
| 71 | namespace AudioCore { | 70 | namespace AudioCore { |
| 72 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, | 71 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, |
| 73 | AudioCommon::AudioRendererParameter params, | 72 | AudioCommon::AudioRendererParameter params, |
| 74 | std::shared_ptr<Kernel::WritableEvent> buffer_event_, | 73 | Stream::ReleaseCallback&& release_callback, |
| 75 | std::size_t instance_number) | 74 | std::size_t instance_number) |
| 76 | : worker_params{params}, buffer_event{buffer_event_}, | 75 | : worker_params{params}, memory_pool_info(params.effect_count + params.voice_count * 4), |
| 77 | memory_pool_info(params.effect_count + params.voice_count * 4), | ||
| 78 | voice_context(params.voice_count), effect_context(params.effect_count), mix_context(), | 76 | voice_context(params.voice_count), effect_context(params.effect_count), mix_context(), |
| 79 | sink_context(params.sink_count), splitter_context(), | 77 | sink_context(params.sink_count), splitter_context(), |
| 80 | voices(params.voice_count), memory{memory_}, | 78 | voices(params.voice_count), memory{memory_}, |
| @@ -85,10 +83,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory | |||
| 85 | params.num_splitter_send_channels); | 83 | params.num_splitter_send_channels); |
| 86 | mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count); | 84 | mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count); |
| 87 | audio_out = std::make_unique<AudioCore::AudioOut>(); | 85 | audio_out = std::make_unique<AudioCore::AudioOut>(); |
| 88 | stream = | 86 | stream = audio_out->OpenStream( |
| 89 | audio_out->OpenStream(core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, | 87 | core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, |
| 90 | fmt::format("AudioRenderer-Instance{}", instance_number), | 88 | fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); |
| 91 | [=]() { buffer_event_->Signal(); }); | ||
| 92 | audio_out->StartStream(stream); | 89 | audio_out->StartStream(stream); |
| 93 | 90 | ||
| 94 | QueueMixedBuffer(0); | 91 | QueueMixedBuffer(0); |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 90f7eafa4..18567f618 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -27,10 +27,6 @@ namespace Core::Timing { | |||
| 27 | class CoreTiming; | 27 | class CoreTiming; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | namespace Kernel { | ||
| 31 | class WritableEvent; | ||
| 32 | } | ||
| 33 | |||
| 34 | namespace Core::Memory { | 30 | namespace Core::Memory { |
| 35 | class Memory; | 31 | class Memory; |
| 36 | } | 32 | } |
| @@ -44,8 +40,7 @@ class AudioRenderer { | |||
| 44 | public: | 40 | public: |
| 45 | AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, | 41 | AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, |
| 46 | AudioCommon::AudioRendererParameter params, | 42 | AudioCommon::AudioRendererParameter params, |
| 47 | std::shared_ptr<Kernel::WritableEvent> buffer_event_, | 43 | Stream::ReleaseCallback&& release_callback, std::size_t instance_number); |
| 48 | std::size_t instance_number); | ||
| 49 | ~AudioRenderer(); | 44 | ~AudioRenderer(); |
| 50 | 45 | ||
| 51 | [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, | 46 | [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, |
| @@ -61,7 +56,6 @@ private: | |||
| 61 | BehaviorInfo behavior_info{}; | 56 | BehaviorInfo behavior_info{}; |
| 62 | 57 | ||
| 63 | AudioCommon::AudioRendererParameter worker_params; | 58 | AudioCommon::AudioRendererParameter worker_params; |
| 64 | std::shared_ptr<Kernel::WritableEvent> buffer_event; | ||
| 65 | std::vector<ServerMemoryPoolInfo> memory_pool_info; | 59 | std::vector<ServerMemoryPoolInfo> memory_pool_info; |
| 66 | VoiceContext voice_context; | 60 | VoiceContext voice_context; |
| 67 | EffectContext effect_context; | 61 | EffectContext effect_context; |
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index eca296589..afe68c9ed 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp | |||
| @@ -130,7 +130,11 @@ bool Stream::ContainsBuffer([[maybe_unused]] Buffer::Tag tag) const { | |||
| 130 | std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) { | 130 | std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) { |
| 131 | std::vector<Buffer::Tag> tags; | 131 | std::vector<Buffer::Tag> tags; |
| 132 | for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) { | 132 | for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) { |
| 133 | tags.push_back(released_buffers.front()->GetTag()); | 133 | if (released_buffers.front()) { |
| 134 | tags.push_back(released_buffers.front()->GetTag()); | ||
| 135 | } else { | ||
| 136 | ASSERT_MSG(false, "Invalid tag in released_buffers!"); | ||
| 137 | } | ||
| 134 | released_buffers.pop(); | 138 | released_buffers.pop(); |
| 135 | } | 139 | } |
| 136 | return tags; | 140 | return tags; |
| @@ -140,7 +144,11 @@ std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers() { | |||
| 140 | std::vector<Buffer::Tag> tags; | 144 | std::vector<Buffer::Tag> tags; |
| 141 | tags.reserve(released_buffers.size()); | 145 | tags.reserve(released_buffers.size()); |
| 142 | while (!released_buffers.empty()) { | 146 | while (!released_buffers.empty()) { |
| 143 | tags.push_back(released_buffers.front()->GetTag()); | 147 | if (released_buffers.front()) { |
| 148 | tags.push_back(released_buffers.front()->GetTag()); | ||
| 149 | } else { | ||
| 150 | ASSERT_MSG(false, "Invalid tag in released_buffers!"); | ||
| 151 | } | ||
| 144 | released_buffers.pop(); | 152 | released_buffers.pop(); |
| 145 | } | 153 | } |
| 146 | return tags; | 154 | return tags; |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 943ff996e..2c2bd2ee8 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -135,8 +135,6 @@ add_library(common STATIC | |||
| 135 | math_util.h | 135 | math_util.h |
| 136 | memory_detect.cpp | 136 | memory_detect.cpp |
| 137 | memory_detect.h | 137 | memory_detect.h |
| 138 | memory_hook.cpp | ||
| 139 | memory_hook.h | ||
| 140 | microprofile.cpp | 138 | microprofile.cpp |
| 141 | microprofile.h | 139 | microprofile.h |
| 142 | microprofileui.h | 140 | microprofileui.h |
| @@ -162,6 +160,8 @@ add_library(common STATIC | |||
| 162 | thread.cpp | 160 | thread.cpp |
| 163 | thread.h | 161 | thread.h |
| 164 | thread_queue_list.h | 162 | thread_queue_list.h |
| 163 | thread_worker.cpp | ||
| 164 | thread_worker.h | ||
| 165 | threadsafe_queue.h | 165 | threadsafe_queue.h |
| 166 | time_zone.cpp | 166 | time_zone.cpp |
| 167 | time_zone.h | 167 | time_zone.h |
diff --git a/src/common/concepts.h b/src/common/concepts.h index 5bef3ad67..aa08065a7 100644 --- a/src/common/concepts.h +++ b/src/common/concepts.h | |||
| @@ -31,4 +31,8 @@ concept DerivedFrom = requires { | |||
| 31 | std::is_convertible_v<const volatile Derived*, const volatile Base*>; | 31 | std::is_convertible_v<const volatile Derived*, const volatile Base*>; |
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | // TODO: Replace with std::convertible_to when libc++ implements it. | ||
| 35 | template <typename From, typename To> | ||
| 36 | concept ConvertibleTo = std::is_convertible_v<From, To>; | ||
| 37 | |||
| 34 | } // namespace Common | 38 | } // namespace Common |
diff --git a/src/common/div_ceil.h b/src/common/div_ceil.h index 6b2c48f91..95e1489a9 100644 --- a/src/common/div_ceil.h +++ b/src/common/div_ceil.h | |||
| @@ -11,16 +11,16 @@ namespace Common { | |||
| 11 | 11 | ||
| 12 | /// Ceiled integer division. | 12 | /// Ceiled integer division. |
| 13 | template <typename N, typename D> | 13 | template <typename N, typename D> |
| 14 | requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeil( | 14 | requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeil(N number, |
| 15 | N number, D divisor) { | 15 | D divisor) { |
| 16 | return (static_cast<D>(number) + divisor - 1) / divisor; | 16 | return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor); |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /// Ceiled integer division with logarithmic divisor in base 2 | 19 | /// Ceiled integer division with logarithmic divisor in base 2 |
| 20 | template <typename N, typename D> | 20 | template <typename N, typename D> |
| 21 | requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeilLog2( | 21 | requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeilLog2( |
| 22 | N value, D alignment_log2) { | 22 | N value, D alignment_log2) { |
| 23 | return (static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2; | 23 | return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2); |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | } // namespace Common | 26 | } // namespace Common |
diff --git a/src/common/memory_hook.cpp b/src/common/memory_hook.cpp deleted file mode 100644 index 3986986d6..000000000 --- a/src/common/memory_hook.cpp +++ /dev/null | |||
| @@ -1,11 +0,0 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/memory_hook.h" | ||
| 6 | |||
| 7 | namespace Common { | ||
| 8 | |||
| 9 | MemoryHook::~MemoryHook() = default; | ||
| 10 | |||
| 11 | } // namespace Common | ||
diff --git a/src/common/memory_hook.h b/src/common/memory_hook.h deleted file mode 100644 index adaa4c2c5..000000000 --- a/src/common/memory_hook.h +++ /dev/null | |||
| @@ -1,47 +0,0 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Common { | ||
| 13 | |||
| 14 | /** | ||
| 15 | * Memory hooks have two purposes: | ||
| 16 | * 1. To allow reads and writes to a region of memory to be intercepted. This is used to implement | ||
| 17 | * texture forwarding and memory breakpoints for debugging. | ||
| 18 | * 2. To allow for the implementation of MMIO devices. | ||
| 19 | * | ||
| 20 | * A hook may be mapped to multiple regions of memory. | ||
| 21 | * | ||
| 22 | * If a std::nullopt or false is returned from a function, the read/write request is passed through | ||
| 23 | * to the underlying memory region. | ||
| 24 | */ | ||
| 25 | class MemoryHook { | ||
| 26 | public: | ||
| 27 | virtual ~MemoryHook(); | ||
| 28 | |||
| 29 | virtual std::optional<bool> IsValidAddress(VAddr addr) = 0; | ||
| 30 | |||
| 31 | virtual std::optional<u8> Read8(VAddr addr) = 0; | ||
| 32 | virtual std::optional<u16> Read16(VAddr addr) = 0; | ||
| 33 | virtual std::optional<u32> Read32(VAddr addr) = 0; | ||
| 34 | virtual std::optional<u64> Read64(VAddr addr) = 0; | ||
| 35 | |||
| 36 | virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0; | ||
| 37 | |||
| 38 | virtual bool Write8(VAddr addr, u8 data) = 0; | ||
| 39 | virtual bool Write16(VAddr addr, u16 data) = 0; | ||
| 40 | virtual bool Write32(VAddr addr, u32 data) = 0; | ||
| 41 | virtual bool Write64(VAddr addr, u64 data) = 0; | ||
| 42 | |||
| 43 | virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) = 0; | ||
| 44 | }; | ||
| 45 | |||
| 46 | using MemoryHookPointer = std::shared_ptr<MemoryHook>; | ||
| 47 | } // namespace Common | ||
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index bccea0894..8fd8620fd 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp | |||
| @@ -10,16 +10,10 @@ PageTable::PageTable() = default; | |||
| 10 | 10 | ||
| 11 | PageTable::~PageTable() noexcept = default; | 11 | PageTable::~PageTable() noexcept = default; |
| 12 | 12 | ||
| 13 | void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, | 13 | void PageTable::Resize(size_t address_space_width_in_bits, size_t page_size_in_bits) { |
| 14 | bool has_attribute) { | 14 | const size_t num_page_table_entries{1ULL << (address_space_width_in_bits - page_size_in_bits)}; |
| 15 | const std::size_t num_page_table_entries{1ULL | ||
| 16 | << (address_space_width_in_bits - page_size_in_bits)}; | ||
| 17 | pointers.resize(num_page_table_entries); | 15 | pointers.resize(num_page_table_entries); |
| 18 | backing_addr.resize(num_page_table_entries); | 16 | backing_addr.resize(num_page_table_entries); |
| 19 | |||
| 20 | if (has_attribute) { | ||
| 21 | attributes.resize(num_page_table_entries); | ||
| 22 | } | ||
| 23 | } | 17 | } |
| 24 | 18 | ||
| 25 | } // namespace Common | 19 | } // namespace Common |
diff --git a/src/common/page_table.h b/src/common/page_table.h index 9754fabf9..61c5552e0 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -4,10 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 7 | #include <tuple> | 8 | #include <tuple> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/memory_hook.h" | ||
| 11 | #include "common/virtual_buffer.h" | 11 | #include "common/virtual_buffer.h" |
| 12 | 12 | ||
| 13 | namespace Common { | 13 | namespace Common { |
| @@ -20,27 +20,6 @@ enum class PageType : u8 { | |||
| 20 | /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and | 20 | /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and |
| 21 | /// invalidation | 21 | /// invalidation |
| 22 | RasterizerCachedMemory, | 22 | RasterizerCachedMemory, |
| 23 | /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. | ||
| 24 | Special, | ||
| 25 | /// Page is allocated for use. | ||
| 26 | Allocated, | ||
| 27 | }; | ||
| 28 | |||
| 29 | struct SpecialRegion { | ||
| 30 | enum class Type { | ||
| 31 | DebugHook, | ||
| 32 | IODevice, | ||
| 33 | } type; | ||
| 34 | |||
| 35 | MemoryHookPointer handler; | ||
| 36 | |||
| 37 | [[nodiscard]] bool operator<(const SpecialRegion& other) const { | ||
| 38 | return std::tie(type, handler) < std::tie(other.type, other.handler); | ||
| 39 | } | ||
| 40 | |||
| 41 | [[nodiscard]] bool operator==(const SpecialRegion& other) const { | ||
| 42 | return std::tie(type, handler) == std::tie(other.type, other.handler); | ||
| 43 | } | ||
| 44 | }; | 23 | }; |
| 45 | 24 | ||
| 46 | /** | 25 | /** |
| @@ -48,6 +27,59 @@ struct SpecialRegion { | |||
| 48 | * mimics the way a real CPU page table works. | 27 | * mimics the way a real CPU page table works. |
| 49 | */ | 28 | */ |
| 50 | struct PageTable { | 29 | struct PageTable { |
| 30 | /// Number of bits reserved for attribute tagging. | ||
| 31 | /// This can be at most the guaranteed alignment of the pointers in the page table. | ||
| 32 | static constexpr int ATTRIBUTE_BITS = 2; | ||
| 33 | |||
| 34 | /** | ||
| 35 | * Pair of host pointer and page type attribute. | ||
| 36 | * This uses the lower bits of a given pointer to store the attribute tag. | ||
| 37 | * Writing and reading the pointer attribute pair is guaranteed to be atomic for the same method | ||
| 38 | * call. In other words, they are guaranteed to be synchronized at all times. | ||
| 39 | */ | ||
| 40 | class PageInfo { | ||
| 41 | public: | ||
| 42 | /// Returns the page pointer | ||
| 43 | [[nodiscard]] u8* Pointer() const noexcept { | ||
| 44 | return ExtractPointer(raw.load(std::memory_order_relaxed)); | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns the page type attribute | ||
| 48 | [[nodiscard]] PageType Type() const noexcept { | ||
| 49 | return ExtractType(raw.load(std::memory_order_relaxed)); | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Returns the page pointer and attribute pair, extracted from the same atomic read | ||
| 53 | [[nodiscard]] std::pair<u8*, PageType> PointerType() const noexcept { | ||
| 54 | const uintptr_t non_atomic_raw = raw.load(std::memory_order_relaxed); | ||
| 55 | return {ExtractPointer(non_atomic_raw), ExtractType(non_atomic_raw)}; | ||
| 56 | } | ||
| 57 | |||
| 58 | /// Returns the raw representation of the page information. | ||
| 59 | /// Use ExtractPointer and ExtractType to unpack the value. | ||
| 60 | [[nodiscard]] uintptr_t Raw() const noexcept { | ||
| 61 | return raw.load(std::memory_order_relaxed); | ||
| 62 | } | ||
| 63 | |||
| 64 | /// Write a page pointer and type pair atomically | ||
| 65 | void Store(u8* pointer, PageType type) noexcept { | ||
| 66 | raw.store(reinterpret_cast<uintptr_t>(pointer) | static_cast<uintptr_t>(type)); | ||
| 67 | } | ||
| 68 | |||
| 69 | /// Unpack a pointer from a page info raw representation | ||
| 70 | [[nodiscard]] static u8* ExtractPointer(uintptr_t raw) noexcept { | ||
| 71 | return reinterpret_cast<u8*>(raw & (~uintptr_t{0} << ATTRIBUTE_BITS)); | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Unpack a page type from a page info raw representation | ||
| 75 | [[nodiscard]] static PageType ExtractType(uintptr_t raw) noexcept { | ||
| 76 | return static_cast<PageType>(raw & ((uintptr_t{1} << ATTRIBUTE_BITS) - 1)); | ||
| 77 | } | ||
| 78 | |||
| 79 | private: | ||
| 80 | std::atomic<uintptr_t> raw; | ||
| 81 | }; | ||
| 82 | |||
| 51 | PageTable(); | 83 | PageTable(); |
| 52 | ~PageTable() noexcept; | 84 | ~PageTable() noexcept; |
| 53 | 85 | ||
| @@ -58,25 +90,21 @@ struct PageTable { | |||
| 58 | PageTable& operator=(PageTable&&) noexcept = default; | 90 | PageTable& operator=(PageTable&&) noexcept = default; |
| 59 | 91 | ||
| 60 | /** | 92 | /** |
| 61 | * Resizes the page table to be able to accomodate enough pages within | 93 | * Resizes the page table to be able to accommodate enough pages within |
| 62 | * a given address space. | 94 | * a given address space. |
| 63 | * | 95 | * |
| 64 | * @param address_space_width_in_bits The address size width in bits. | 96 | * @param address_space_width_in_bits The address size width in bits. |
| 65 | * @param page_size_in_bits The page size in bits. | 97 | * @param page_size_in_bits The page size in bits. |
| 66 | * @param has_attribute Whether or not this page has any backing attributes. | ||
| 67 | */ | 98 | */ |
| 68 | void Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, | 99 | void Resize(size_t address_space_width_in_bits, size_t page_size_in_bits); |
| 69 | bool has_attribute); | ||
| 70 | 100 | ||
| 71 | /** | 101 | /** |
| 72 | * Vector of memory pointers backing each page. An entry can only be non-null if the | 102 | * Vector of memory pointers backing each page. An entry can only be non-null if the |
| 73 | * corresponding entry in the `attributes` vector is of type `Memory`. | 103 | * corresponding attribute element is of type `Memory`. |
| 74 | */ | 104 | */ |
| 75 | VirtualBuffer<u8*> pointers; | 105 | VirtualBuffer<PageInfo> pointers; |
| 76 | 106 | ||
| 77 | VirtualBuffer<u64> backing_addr; | 107 | VirtualBuffer<u64> backing_addr; |
| 78 | |||
| 79 | VirtualBuffer<PageType> attributes; | ||
| 80 | }; | 108 | }; |
| 81 | 109 | ||
| 82 | } // namespace Common | 110 | } // namespace Common |
diff --git a/src/common/swap.h b/src/common/swap.h index 7665942a2..a80e191dc 100644 --- a/src/common/swap.h +++ b/src/common/swap.h | |||
| @@ -394,7 +394,7 @@ public: | |||
| 394 | template <typename S, typename T2, typename F2> | 394 | template <typename S, typename T2, typename F2> |
| 395 | friend S operator%(const S& p, const swapped_t v); | 395 | friend S operator%(const S& p, const swapped_t v); |
| 396 | 396 | ||
| 397 | // Arithmetics + assignements | 397 | // Arithmetics + assignments |
| 398 | template <typename S, typename T2, typename F2> | 398 | template <typename S, typename T2, typename F2> |
| 399 | friend S operator+=(const S& p, const swapped_t v); | 399 | friend S operator+=(const S& p, const swapped_t v); |
| 400 | 400 | ||
| @@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t<T, F> v) { | |||
| 451 | return i % v.swap(); | 451 | return i % v.swap(); |
| 452 | } | 452 | } |
| 453 | 453 | ||
| 454 | // Arithmetics + assignements | 454 | // Arithmetics + assignments |
| 455 | template <typename S, typename T, typename F> | 455 | template <typename S, typename T, typename F> |
| 456 | S& operator+=(S& i, const swap_struct_t<T, F> v) { | 456 | S& operator+=(S& i, const swap_struct_t<T, F> v) { |
| 457 | i += v.swap(); | 457 | i += v.swap(); |
diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp new file mode 100644 index 000000000..8f9bf447a --- /dev/null +++ b/src/common/thread_worker.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/thread.h" | ||
| 6 | #include "common/thread_worker.h" | ||
| 7 | |||
| 8 | namespace Common { | ||
| 9 | |||
| 10 | ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) { | ||
| 11 | for (std::size_t i = 0; i < num_workers; ++i) | ||
| 12 | threads.emplace_back([this, thread_name{std::string{name}}] { | ||
| 13 | Common::SetCurrentThreadName(thread_name.c_str()); | ||
| 14 | |||
| 15 | // Wait for first request | ||
| 16 | { | ||
| 17 | std::unique_lock lock{queue_mutex}; | ||
| 18 | condition.wait(lock, [this] { return stop || !requests.empty(); }); | ||
| 19 | } | ||
| 20 | |||
| 21 | while (true) { | ||
| 22 | std::function<void()> task; | ||
| 23 | |||
| 24 | { | ||
| 25 | std::unique_lock lock{queue_mutex}; | ||
| 26 | condition.wait(lock, [this] { return stop || !requests.empty(); }); | ||
| 27 | if (stop || requests.empty()) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | task = std::move(requests.front()); | ||
| 31 | requests.pop(); | ||
| 32 | } | ||
| 33 | |||
| 34 | task(); | ||
| 35 | } | ||
| 36 | }); | ||
| 37 | } | ||
| 38 | |||
| 39 | ThreadWorker::~ThreadWorker() { | ||
| 40 | { | ||
| 41 | std::unique_lock lock{queue_mutex}; | ||
| 42 | stop = true; | ||
| 43 | } | ||
| 44 | condition.notify_all(); | ||
| 45 | for (std::thread& thread : threads) { | ||
| 46 | thread.join(); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void ThreadWorker::QueueWork(std::function<void()>&& work) { | ||
| 51 | { | ||
| 52 | std::unique_lock lock{queue_mutex}; | ||
| 53 | requests.emplace(work); | ||
| 54 | } | ||
| 55 | condition.notify_one(); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Common | ||
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h new file mode 100644 index 000000000..f1859971f --- /dev/null +++ b/src/common/thread_worker.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <atomic> | ||
| 8 | #include <functional> | ||
| 9 | #include <mutex> | ||
| 10 | #include <string> | ||
| 11 | #include <vector> | ||
| 12 | #include <queue> | ||
| 13 | |||
| 14 | namespace Common { | ||
| 15 | |||
| 16 | class ThreadWorker final { | ||
| 17 | public: | ||
| 18 | explicit ThreadWorker(std::size_t num_workers, const std::string& name); | ||
| 19 | ~ThreadWorker(); | ||
| 20 | void QueueWork(std::function<void()>&& work); | ||
| 21 | |||
| 22 | private: | ||
| 23 | std::vector<std::thread> threads; | ||
| 24 | std::queue<std::function<void()>> requests; | ||
| 25 | std::mutex queue_mutex; | ||
| 26 | std::condition_variable condition; | ||
| 27 | std::atomic_bool stop{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | } // namespace Common | ||
diff --git a/src/common/virtual_buffer.h b/src/common/virtual_buffer.h index 91d430036..fb1a6f81f 100644 --- a/src/common/virtual_buffer.h +++ b/src/common/virtual_buffer.h | |||
| @@ -15,10 +15,12 @@ void FreeMemoryPages(void* base, std::size_t size) noexcept; | |||
| 15 | template <typename T> | 15 | template <typename T> |
| 16 | class VirtualBuffer final { | 16 | class VirtualBuffer final { |
| 17 | public: | 17 | public: |
| 18 | static_assert( | 18 | // TODO: Uncomment this and change Common::PageTable::PageInfo to be trivially constructible |
| 19 | std::is_trivially_constructible_v<T>, | 19 | // using std::atomic_ref once libc++ has support for it |
| 20 | "T must be trivially constructible, as non-trivial constructors will not be executed " | 20 | // static_assert( |
| 21 | "with the current allocator"); | 21 | // std::is_trivially_constructible_v<T>, |
| 22 | // "T must be trivially constructible, as non-trivial constructors will not be executed " | ||
| 23 | // "with the current allocator"); | ||
| 22 | 24 | ||
| 23 | constexpr VirtualBuffer() = default; | 25 | constexpr VirtualBuffer() = default; |
| 24 | explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} { | 26 | explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} { |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 59bd3d2a6..893df433a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -202,6 +202,8 @@ add_library(core STATIC | |||
| 202 | hle/kernel/server_port.h | 202 | hle/kernel/server_port.h |
| 203 | hle/kernel/server_session.cpp | 203 | hle/kernel/server_session.cpp |
| 204 | hle/kernel/server_session.h | 204 | hle/kernel/server_session.h |
| 205 | hle/kernel/service_thread.cpp | ||
| 206 | hle/kernel/service_thread.h | ||
| 205 | hle/kernel/session.cpp | 207 | hle/kernel/session.cpp |
| 206 | hle/kernel/session.h | 208 | hle/kernel/session.h |
| 207 | hle/kernel/shared_memory.cpp | 209 | hle/kernel/shared_memory.cpp |
| @@ -500,7 +502,6 @@ add_library(core STATIC | |||
| 500 | hle/service/sm/controller.h | 502 | hle/service/sm/controller.h |
| 501 | hle/service/sm/sm.cpp | 503 | hle/service/sm/sm.cpp |
| 502 | hle/service/sm/sm.h | 504 | hle/service/sm/sm.h |
| 503 | hle/service/sockets/blocking_worker.h | ||
| 504 | hle/service/sockets/bsd.cpp | 505 | hle/service/sockets/bsd.cpp |
| 505 | hle/service/sockets/bsd.h | 506 | hle/service/sockets/bsd.h |
| 506 | hle/service/sockets/ethc.cpp | 507 | hle/service/sockets/ethc.cpp |
| @@ -634,6 +635,8 @@ if (MSVC) | |||
| 634 | /we4267 | 635 | /we4267 |
| 635 | # 'context' : truncation from 'type1' to 'type2' | 636 | # 'context' : truncation from 'type1' to 'type2' |
| 636 | /we4305 | 637 | /we4305 |
| 638 | # 'function' : not all control paths return a value | ||
| 639 | /we4715 | ||
| 637 | ) | 640 | ) |
| 638 | else() | 641 | else() |
| 639 | target_compile_options(core PRIVATE | 642 | target_compile_options(core PRIVATE |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index e9c74b1a6..6c4c8e9e4 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp | |||
| @@ -71,15 +71,8 @@ public: | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { | 73 | void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { |
| 74 | switch (exception) { | ||
| 75 | case Dynarmic::A32::Exception::UndefinedInstruction: | ||
| 76 | case Dynarmic::A32::Exception::UnpredictableInstruction: | ||
| 77 | break; | ||
| 78 | case Dynarmic::A32::Exception::Breakpoint: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", | 74 | LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", |
| 82 | static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); | 75 | exception, pc, MemoryReadCode(pc)); |
| 83 | UNIMPLEMENTED(); | 76 | UNIMPLEMENTED(); |
| 84 | } | 77 | } |
| 85 | 78 | ||
| @@ -133,6 +126,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& | |||
| 133 | config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( | 126 | config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( |
| 134 | page_table.pointers.data()); | 127 | page_table.pointers.data()); |
| 135 | config.absolute_offset_page_table = true; | 128 | config.absolute_offset_page_table = true; |
| 129 | config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; | ||
| 136 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; | 130 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; |
| 137 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; | 131 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; |
| 138 | 132 | ||
| @@ -180,6 +174,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& | |||
| 180 | if (Settings::values.cpuopt_unsafe_reduce_fp_error) { | 174 | if (Settings::values.cpuopt_unsafe_reduce_fp_error) { |
| 181 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; | 175 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; |
| 182 | } | 176 | } |
| 177 | if (Settings::values.cpuopt_unsafe_inaccurate_nan) { | ||
| 178 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; | ||
| 179 | } | ||
| 183 | } | 180 | } |
| 184 | 181 | ||
| 185 | return std::make_unique<Dynarmic::A32::Jit>(config); | 182 | return std::make_unique<Dynarmic::A32::Jit>(config); |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 7a4eb88a2..4c5ebca22 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp | |||
| @@ -152,6 +152,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& | |||
| 152 | // Memory | 152 | // Memory |
| 153 | config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); | 153 | config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); |
| 154 | config.page_table_address_space_bits = address_space_bits; | 154 | config.page_table_address_space_bits = address_space_bits; |
| 155 | config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; | ||
| 155 | config.silently_mirror_page_table = false; | 156 | config.silently_mirror_page_table = false; |
| 156 | config.absolute_offset_page_table = true; | 157 | config.absolute_offset_page_table = true; |
| 157 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; | 158 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; |
| @@ -211,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& | |||
| 211 | if (Settings::values.cpuopt_unsafe_reduce_fp_error) { | 212 | if (Settings::values.cpuopt_unsafe_reduce_fp_error) { |
| 212 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; | 213 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; |
| 213 | } | 214 | } |
| 215 | if (Settings::values.cpuopt_unsafe_inaccurate_nan) { | ||
| 216 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; | ||
| 217 | } | ||
| 214 | } | 218 | } |
| 215 | 219 | ||
| 216 | return std::make_shared<Dynarmic::A64::Jit>(config); | 220 | return std::make_shared<Dynarmic::A64::Jit>(config); |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 0961c0819..1a2002dec 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -159,7 +159,7 @@ struct System::Impl { | |||
| 159 | device_memory = std::make_unique<Core::DeviceMemory>(); | 159 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| 160 | 160 | ||
| 161 | is_multicore = Settings::values.use_multi_core.GetValue(); | 161 | is_multicore = Settings::values.use_multi_core.GetValue(); |
| 162 | is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 162 | is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| 163 | 163 | ||
| 164 | kernel.SetMulticore(is_multicore); | 164 | kernel.SetMulticore(is_multicore); |
| 165 | cpu_manager.SetMulticore(is_multicore); | 165 | cpu_manager.SetMulticore(is_multicore); |
| @@ -307,7 +307,6 @@ struct System::Impl { | |||
| 307 | service_manager.reset(); | 307 | service_manager.reset(); |
| 308 | cheat_engine.reset(); | 308 | cheat_engine.reset(); |
| 309 | telemetry_session.reset(); | 309 | telemetry_session.reset(); |
| 310 | device_memory.reset(); | ||
| 311 | 310 | ||
| 312 | // Close all CPU/threading state | 311 | // Close all CPU/threading state |
| 313 | cpu_manager.Shutdown(); | 312 | cpu_manager.Shutdown(); |
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index da15f764a..cebe2ce37 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp | |||
| @@ -143,6 +143,7 @@ u64 GetSignatureTypeDataSize(SignatureType type) { | |||
| 143 | return 0x3C; | 143 | return 0x3C; |
| 144 | } | 144 | } |
| 145 | UNREACHABLE(); | 145 | UNREACHABLE(); |
| 146 | return 0; | ||
| 146 | } | 147 | } |
| 147 | 148 | ||
| 148 | u64 GetSignatureTypePaddingSize(SignatureType type) { | 149 | u64 GetSignatureTypePaddingSize(SignatureType type) { |
| @@ -157,6 +158,7 @@ u64 GetSignatureTypePaddingSize(SignatureType type) { | |||
| 157 | return 0x40; | 158 | return 0x40; |
| 158 | } | 159 | } |
| 159 | UNREACHABLE(); | 160 | UNREACHABLE(); |
| 161 | return 0; | ||
| 160 | } | 162 | } |
| 161 | 163 | ||
| 162 | SignatureType Ticket::GetSignatureType() const { | 164 | SignatureType Ticket::GetSignatureType() const { |
| @@ -169,8 +171,7 @@ SignatureType Ticket::GetSignatureType() const { | |||
| 169 | if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { | 171 | if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { |
| 170 | return ticket->sig_type; | 172 | return ticket->sig_type; |
| 171 | } | 173 | } |
| 172 | 174 | throw std::bad_variant_access{}; | |
| 173 | UNREACHABLE(); | ||
| 174 | } | 175 | } |
| 175 | 176 | ||
| 176 | TicketData& Ticket::GetData() { | 177 | TicketData& Ticket::GetData() { |
| @@ -183,8 +184,7 @@ TicketData& Ticket::GetData() { | |||
| 183 | if (auto* ticket = std::get_if<ECDSATicket>(&data)) { | 184 | if (auto* ticket = std::get_if<ECDSATicket>(&data)) { |
| 184 | return ticket->data; | 185 | return ticket->data; |
| 185 | } | 186 | } |
| 186 | 187 | throw std::bad_variant_access{}; | |
| 187 | UNREACHABLE(); | ||
| 188 | } | 188 | } |
| 189 | 189 | ||
| 190 | const TicketData& Ticket::GetData() const { | 190 | const TicketData& Ticket::GetData() const { |
| @@ -197,8 +197,7 @@ const TicketData& Ticket::GetData() const { | |||
| 197 | if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { | 197 | if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { |
| 198 | return ticket->data; | 198 | return ticket->data; |
| 199 | } | 199 | } |
| 200 | 200 | throw std::bad_variant_access{}; | |
| 201 | UNREACHABLE(); | ||
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | u64 Ticket::GetSize() const { | 203 | u64 Ticket::GetSize() const { |
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp index adcf0732f..a65ec6798 100644 --- a/src/core/file_sys/nca_patch.cpp +++ b/src/core/file_sys/nca_patch.cpp | |||
| @@ -51,8 +51,8 @@ std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, const BlockTyp | |||
| 51 | low = mid + 1; | 51 | low = mid + 1; |
| 52 | } | 52 | } |
| 53 | } | 53 | } |
| 54 | |||
| 55 | UNREACHABLE_MSG("Offset could not be found in BKTR block."); | 54 | UNREACHABLE_MSG("Offset could not be found in BKTR block."); |
| 55 | return {0, 0}; | ||
| 56 | } | 56 | } |
| 57 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 58 | 58 | ||
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index da01002d5..431302f55 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp | |||
| @@ -105,7 +105,8 @@ ContentRecordType GetCRTypeFromNCAType(NCAContentType type) { | |||
| 105 | // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. | 105 | // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. |
| 106 | return ContentRecordType::HtmlDocument; | 106 | return ContentRecordType::HtmlDocument; |
| 107 | default: | 107 | default: |
| 108 | UNREACHABLE_MSG("Invalid NCAContentType={:02X}", static_cast<u8>(type)); | 108 | UNREACHABLE_MSG("Invalid NCAContentType={:02X}", type); |
| 109 | return ContentRecordType{}; | ||
| 109 | } | 110 | } |
| 110 | } | 111 | } |
| 111 | 112 | ||
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h index 5b414b0f0..b08a1687a 100644 --- a/src/core/file_sys/registered_cache.h +++ b/src/core/file_sys/registered_cache.h | |||
| @@ -67,18 +67,18 @@ public: | |||
| 67 | virtual void Refresh() = 0; | 67 | virtual void Refresh() = 0; |
| 68 | 68 | ||
| 69 | virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0; | 69 | virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0; |
| 70 | virtual bool HasEntry(ContentProviderEntry entry) const; | 70 | bool HasEntry(ContentProviderEntry entry) const; |
| 71 | 71 | ||
| 72 | virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0; | 72 | virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0; |
| 73 | 73 | ||
| 74 | virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0; | 74 | virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0; |
| 75 | virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const; | 75 | VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const; |
| 76 | 76 | ||
| 77 | virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0; | 77 | virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0; |
| 78 | virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const; | 78 | VirtualFile GetEntryRaw(ContentProviderEntry entry) const; |
| 79 | 79 | ||
| 80 | virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0; | 80 | virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0; |
| 81 | virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const; | 81 | std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const; |
| 82 | 82 | ||
| 83 | virtual std::vector<ContentProviderEntry> ListEntries() const; | 83 | virtual std::vector<ContentProviderEntry> ListEntries() const; |
| 84 | 84 | ||
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index e75e80ad0..83decf6cf 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp | |||
| @@ -46,43 +46,6 @@ void SessionRequestHandler::ClientDisconnected( | |||
| 46 | boost::range::remove_erase(connected_sessions, server_session); | 46 | boost::range::remove_erase(connected_sessions, server_session); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( | ||
| 50 | const std::string& reason, u64 timeout, WakeupCallback&& callback, | ||
| 51 | std::shared_ptr<WritableEvent> writable_event) { | ||
| 52 | // Put the client thread to sleep until the wait event is signaled or the timeout expires. | ||
| 53 | |||
| 54 | if (!writable_event) { | ||
| 55 | // Create event if not provided | ||
| 56 | const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason); | ||
| 57 | writable_event = pair.writable; | ||
| 58 | } | ||
| 59 | |||
| 60 | Handle event_handle = InvalidHandle; | ||
| 61 | { | ||
| 62 | KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout); | ||
| 63 | thread->SetHLECallback( | ||
| 64 | [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool { | ||
| 65 | ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT | ||
| 66 | ? ThreadWakeupReason::Timeout | ||
| 67 | : ThreadWakeupReason::Signal; | ||
| 68 | callback(thread, context, reason); | ||
| 69 | context.WriteToOutgoingCommandBuffer(*thread); | ||
| 70 | return true; | ||
| 71 | }); | ||
| 72 | const auto readable_event{writable_event->GetReadableEvent()}; | ||
| 73 | writable_event->Clear(); | ||
| 74 | thread->SetHLESyncObject(readable_event.get()); | ||
| 75 | thread->SetStatus(ThreadStatus::WaitHLEEvent); | ||
| 76 | thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); | ||
| 77 | readable_event->AddWaitingThread(thread); | ||
| 78 | } | ||
| 79 | thread->SetHLETimeEvent(event_handle); | ||
| 80 | |||
| 81 | is_thread_waiting = true; | ||
| 82 | |||
| 83 | return writable_event; | ||
| 84 | } | ||
| 85 | |||
| 86 | HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory, | 49 | HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory, |
| 87 | std::shared_ptr<ServerSession> server_session, | 50 | std::shared_ptr<ServerSession> server_session, |
| 88 | std::shared_ptr<Thread> thread) | 51 | std::shared_ptr<Thread> thread) |
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index c31a65476..b112e1ebd 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h | |||
| @@ -129,23 +129,6 @@ public: | |||
| 129 | using WakeupCallback = std::function<void( | 129 | using WakeupCallback = std::function<void( |
| 130 | std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>; | 130 | std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>; |
| 131 | 131 | ||
| 132 | /** | ||
| 133 | * Puts the specified guest thread to sleep until the returned event is signaled or until the | ||
| 134 | * specified timeout expires. | ||
| 135 | * @param reason Reason for pausing the thread, to be used for debugging purposes. | ||
| 136 | * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback | ||
| 137 | * invoked with a Timeout reason. | ||
| 138 | * @param callback Callback to be invoked when the thread is resumed. This callback must write | ||
| 139 | * the entire command response once again, regardless of the state of it before this function | ||
| 140 | * was called. | ||
| 141 | * @param writable_event Event to use to wake up the thread. If unspecified, an event will be | ||
| 142 | * created. | ||
| 143 | * @returns Event that when signaled will resume the thread and call the callback function. | ||
| 144 | */ | ||
| 145 | std::shared_ptr<WritableEvent> SleepClientThread( | ||
| 146 | const std::string& reason, u64 timeout, WakeupCallback&& callback, | ||
| 147 | std::shared_ptr<WritableEvent> writable_event = nullptr); | ||
| 148 | |||
| 149 | /// Populates this context with data from the requesting process/thread. | 132 | /// Populates this context with data from the requesting process/thread. |
| 150 | ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table, | 133 | ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table, |
| 151 | u32_le* src_cmdbuf); | 134 | u32_le* src_cmdbuf); |
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h index 01a577d0c..99fb8fe93 100644 --- a/src/core/hle/kernel/k_priority_queue.h +++ b/src/core/hle/kernel/k_priority_queue.h | |||
| @@ -8,11 +8,13 @@ | |||
| 8 | #pragma once | 8 | #pragma once |
| 9 | 9 | ||
| 10 | #include <array> | 10 | #include <array> |
| 11 | #include <concepts> | ||
| 11 | 12 | ||
| 12 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 13 | #include "common/bit_set.h" | 14 | #include "common/bit_set.h" |
| 14 | #include "common/bit_util.h" | 15 | #include "common/bit_util.h" |
| 15 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/concepts.h" | ||
| 16 | 18 | ||
| 17 | namespace Kernel { | 19 | namespace Kernel { |
| 18 | 20 | ||
| @@ -21,7 +23,7 @@ class Thread; | |||
| 21 | template <typename T> | 23 | template <typename T> |
| 22 | concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { | 24 | concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { |
| 23 | { t.GetAffinityMask() } | 25 | { t.GetAffinityMask() } |
| 24 | ->std::convertible_to<u64>; | 26 | ->Common::ConvertibleTo<u64>; |
| 25 | {t.SetAffinityMask(std::declval<u64>())}; | 27 | {t.SetAffinityMask(std::declval<u64>())}; |
| 26 | 28 | ||
| 27 | { t.GetAffinity(std::declval<int32_t>()) } | 29 | { t.GetAffinity(std::declval<int32_t>()) } |
| @@ -48,9 +50,9 @@ concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) { | |||
| 48 | ->KPriorityQueueAffinityMask; | 50 | ->KPriorityQueueAffinityMask; |
| 49 | 51 | ||
| 50 | { t.GetActiveCore() } | 52 | { t.GetActiveCore() } |
| 51 | ->std::convertible_to<s32>; | 53 | ->Common::ConvertibleTo<s32>; |
| 52 | { t.GetPriority() } | 54 | { t.GetPriority() } |
| 53 | ->std::convertible_to<s32>; | 55 | ->Common::ConvertibleTo<s32>; |
| 54 | }; | 56 | }; |
| 55 | 57 | ||
| 56 | template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> | 58 | template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> |
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h index 2d675b39e..2f1c1f691 100644 --- a/src/core/hle/kernel/k_scheduler_lock.h +++ b/src/core/hle/kernel/k_scheduler_lock.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 11 | #include "common/spin_lock.h" | 11 | #include "common/spin_lock.h" |
| 12 | #include "core/hardware_properties.h" | 12 | #include "core/hardware_properties.h" |
| 13 | #include "core/hle/kernel/kernel.h" | ||
| 13 | 14 | ||
| 14 | namespace Kernel { | 15 | namespace Kernel { |
| 15 | 16 | ||
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 04cae3a43..e8ece8164 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -8,13 +8,14 @@ | |||
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <thread> | 10 | #include <thread> |
| 11 | #include <unordered_map> | 11 | #include <unordered_set> |
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | 13 | ||
| 14 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/microprofile.h" | 16 | #include "common/microprofile.h" |
| 17 | #include "common/thread.h" | 17 | #include "common/thread.h" |
| 18 | #include "common/thread_worker.h" | ||
| 18 | #include "core/arm/arm_interface.h" | 19 | #include "core/arm/arm_interface.h" |
| 19 | #include "core/arm/cpu_interrupt_handler.h" | 20 | #include "core/arm/cpu_interrupt_handler.h" |
| 20 | #include "core/arm/exclusive_monitor.h" | 21 | #include "core/arm/exclusive_monitor.h" |
| @@ -35,6 +36,7 @@ | |||
| 35 | #include "core/hle/kernel/physical_core.h" | 36 | #include "core/hle/kernel/physical_core.h" |
| 36 | #include "core/hle/kernel/process.h" | 37 | #include "core/hle/kernel/process.h" |
| 37 | #include "core/hle/kernel/resource_limit.h" | 38 | #include "core/hle/kernel/resource_limit.h" |
| 39 | #include "core/hle/kernel/service_thread.h" | ||
| 38 | #include "core/hle/kernel/shared_memory.h" | 40 | #include "core/hle/kernel/shared_memory.h" |
| 39 | #include "core/hle/kernel/synchronization.h" | 41 | #include "core/hle/kernel/synchronization.h" |
| 40 | #include "core/hle/kernel/thread.h" | 42 | #include "core/hle/kernel/thread.h" |
| @@ -60,6 +62,8 @@ struct KernelCore::Impl { | |||
| 60 | RegisterHostThread(); | 62 | RegisterHostThread(); |
| 61 | 63 | ||
| 62 | global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); | 64 | global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); |
| 65 | service_thread_manager = | ||
| 66 | std::make_unique<Common::ThreadWorker>(1, "yuzu:ServiceThreadManager"); | ||
| 63 | 67 | ||
| 64 | InitializePhysicalCores(); | 68 | InitializePhysicalCores(); |
| 65 | InitializeSystemResourceLimit(kernel); | 69 | InitializeSystemResourceLimit(kernel); |
| @@ -76,6 +80,12 @@ struct KernelCore::Impl { | |||
| 76 | } | 80 | } |
| 77 | 81 | ||
| 78 | void Shutdown() { | 82 | void Shutdown() { |
| 83 | process_list.clear(); | ||
| 84 | |||
| 85 | // Ensures all service threads gracefully shutdown | ||
| 86 | service_thread_manager.reset(); | ||
| 87 | service_threads.clear(); | ||
| 88 | |||
| 79 | next_object_id = 0; | 89 | next_object_id = 0; |
| 80 | next_kernel_process_id = Process::InitialKIPIDMin; | 90 | next_kernel_process_id = Process::InitialKIPIDMin; |
| 81 | next_user_process_id = Process::ProcessIDMin; | 91 | next_user_process_id = Process::ProcessIDMin; |
| @@ -89,8 +99,6 @@ struct KernelCore::Impl { | |||
| 89 | 99 | ||
| 90 | cores.clear(); | 100 | cores.clear(); |
| 91 | 101 | ||
| 92 | process_list.clear(); | ||
| 93 | |||
| 94 | current_process = nullptr; | 102 | current_process = nullptr; |
| 95 | 103 | ||
| 96 | system_resource_limit = nullptr; | 104 | system_resource_limit = nullptr; |
| @@ -103,10 +111,8 @@ struct KernelCore::Impl { | |||
| 103 | 111 | ||
| 104 | exclusive_monitor.reset(); | 112 | exclusive_monitor.reset(); |
| 105 | 113 | ||
| 106 | num_host_threads = 0; | 114 | // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others |
| 107 | std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), | 115 | next_host_thread_id = Core::Hardware::NUM_CPU_CORES; |
| 108 | std::thread::id{}); | ||
| 109 | std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0); | ||
| 110 | } | 116 | } |
| 111 | 117 | ||
| 112 | void InitializePhysicalCores() { | 118 | void InitializePhysicalCores() { |
| @@ -186,52 +192,46 @@ struct KernelCore::Impl { | |||
| 186 | } | 192 | } |
| 187 | } | 193 | } |
| 188 | 194 | ||
| 195 | /// Creates a new host thread ID, should only be called by GetHostThreadId | ||
| 196 | u32 AllocateHostThreadId(std::optional<std::size_t> core_id) { | ||
| 197 | if (core_id) { | ||
| 198 | // The first for slots are reserved for CPU core threads | ||
| 199 | ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES); | ||
| 200 | return static_cast<u32>(*core_id); | ||
| 201 | } else { | ||
| 202 | return next_host_thread_id++; | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | /// Gets the host thread ID for the caller, allocating a new one if this is the first time | ||
| 207 | u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) { | ||
| 208 | const thread_local auto host_thread_id{AllocateHostThreadId(core_id)}; | ||
| 209 | return host_thread_id; | ||
| 210 | } | ||
| 211 | |||
| 212 | /// Registers a CPU core thread by allocating a host thread ID for it | ||
| 189 | void RegisterCoreThread(std::size_t core_id) { | 213 | void RegisterCoreThread(std::size_t core_id) { |
| 190 | const std::thread::id this_id = std::this_thread::get_id(); | 214 | ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); |
| 215 | const auto this_id = GetHostThreadId(core_id); | ||
| 191 | if (!is_multicore) { | 216 | if (!is_multicore) { |
| 192 | single_core_thread_id = this_id; | 217 | single_core_thread_id = this_id; |
| 193 | } | 218 | } |
| 194 | const auto end = | ||
| 195 | register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); | ||
| 196 | const auto it = std::find(register_host_thread_keys.begin(), end, this_id); | ||
| 197 | ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); | ||
| 198 | ASSERT(it == end); | ||
| 199 | InsertHostThread(static_cast<u32>(core_id)); | ||
| 200 | } | 219 | } |
| 201 | 220 | ||
| 221 | /// Registers a new host thread by allocating a host thread ID for it | ||
| 202 | void RegisterHostThread() { | 222 | void RegisterHostThread() { |
| 203 | const std::thread::id this_id = std::this_thread::get_id(); | 223 | [[maybe_unused]] const auto this_id = GetHostThreadId(); |
| 204 | const auto end = | ||
| 205 | register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); | ||
| 206 | const auto it = std::find(register_host_thread_keys.begin(), end, this_id); | ||
| 207 | if (it == end) { | ||
| 208 | InsertHostThread(registered_thread_ids++); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | void InsertHostThread(u32 value) { | ||
| 213 | const size_t index = num_host_threads++; | ||
| 214 | ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads"); | ||
| 215 | register_host_thread_values[index] = value; | ||
| 216 | register_host_thread_keys[index] = std::this_thread::get_id(); | ||
| 217 | } | 224 | } |
| 218 | 225 | ||
| 219 | [[nodiscard]] u32 GetCurrentHostThreadID() const { | 226 | [[nodiscard]] u32 GetCurrentHostThreadID() { |
| 220 | const std::thread::id this_id = std::this_thread::get_id(); | 227 | const auto this_id = GetHostThreadId(); |
| 221 | if (!is_multicore && single_core_thread_id == this_id) { | 228 | if (!is_multicore && single_core_thread_id == this_id) { |
| 222 | return static_cast<u32>(system.GetCpuManager().CurrentCore()); | 229 | return static_cast<u32>(system.GetCpuManager().CurrentCore()); |
| 223 | } | 230 | } |
| 224 | const auto end = | 231 | return this_id; |
| 225 | register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); | ||
| 226 | const auto it = std::find(register_host_thread_keys.begin(), end, this_id); | ||
| 227 | if (it == end) { | ||
| 228 | return Core::INVALID_HOST_THREAD_ID; | ||
| 229 | } | ||
| 230 | return register_host_thread_values[static_cast<size_t>( | ||
| 231 | std::distance(register_host_thread_keys.begin(), it))]; | ||
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | Core::EmuThreadHandle GetCurrentEmuThreadID() const { | 234 | [[nodiscard]] Core::EmuThreadHandle GetCurrentEmuThreadID() { |
| 235 | Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); | 235 | Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); |
| 236 | result.host_handle = GetCurrentHostThreadID(); | 236 | result.host_handle = GetCurrentHostThreadID(); |
| 237 | if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { | 237 | if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { |
| @@ -325,15 +325,8 @@ struct KernelCore::Impl { | |||
| 325 | std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; | 325 | std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; |
| 326 | std::vector<Kernel::PhysicalCore> cores; | 326 | std::vector<Kernel::PhysicalCore> cores; |
| 327 | 327 | ||
| 328 | // 0-3 IDs represent core threads, >3 represent others | 328 | // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others |
| 329 | std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; | 329 | std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES}; |
| 330 | |||
| 331 | // Number of host threads is a relatively high number to avoid overflowing | ||
| 332 | static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64; | ||
| 333 | std::atomic<size_t> num_host_threads{0}; | ||
| 334 | std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS> | ||
| 335 | register_host_thread_keys{}; | ||
| 336 | std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{}; | ||
| 337 | 330 | ||
| 338 | // Kernel memory management | 331 | // Kernel memory management |
| 339 | std::unique_ptr<Memory::MemoryManager> memory_manager; | 332 | std::unique_ptr<Memory::MemoryManager> memory_manager; |
| @@ -345,12 +338,19 @@ struct KernelCore::Impl { | |||
| 345 | std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; | 338 | std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; |
| 346 | std::shared_ptr<Kernel::SharedMemory> time_shared_mem; | 339 | std::shared_ptr<Kernel::SharedMemory> time_shared_mem; |
| 347 | 340 | ||
| 341 | // Threads used for services | ||
| 342 | std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; | ||
| 343 | |||
| 344 | // Service threads are managed by a worker thread, so that a calling service thread can queue up | ||
| 345 | // the release of itself | ||
| 346 | std::unique_ptr<Common::ThreadWorker> service_thread_manager; | ||
| 347 | |||
| 348 | std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; | 348 | std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; |
| 349 | std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; | 349 | std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; |
| 350 | std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; | 350 | std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; |
| 351 | 351 | ||
| 352 | bool is_multicore{}; | 352 | bool is_multicore{}; |
| 353 | std::thread::id single_core_thread_id{}; | 353 | u32 single_core_thread_id{}; |
| 354 | 354 | ||
| 355 | std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; | 355 | std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; |
| 356 | 356 | ||
| @@ -639,4 +639,19 @@ void KernelCore::ExitSVCProfile() { | |||
| 639 | MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); | 639 | MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); |
| 640 | } | 640 | } |
| 641 | 641 | ||
| 642 | std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) { | ||
| 643 | auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name); | ||
| 644 | impl->service_thread_manager->QueueWork( | ||
| 645 | [this, service_thread] { impl->service_threads.emplace(service_thread); }); | ||
| 646 | return service_thread; | ||
| 647 | } | ||
| 648 | |||
| 649 | void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) { | ||
| 650 | impl->service_thread_manager->QueueWork([this, service_thread] { | ||
| 651 | if (auto strong_ptr = service_thread.lock()) { | ||
| 652 | impl->service_threads.erase(strong_ptr); | ||
| 653 | } | ||
| 654 | }); | ||
| 655 | } | ||
| 656 | |||
| 642 | } // namespace Kernel | 657 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 5846c3f39..e3169f5a7 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h | |||
| @@ -42,6 +42,7 @@ class Process; | |||
| 42 | class ResourceLimit; | 42 | class ResourceLimit; |
| 43 | class KScheduler; | 43 | class KScheduler; |
| 44 | class SharedMemory; | 44 | class SharedMemory; |
| 45 | class ServiceThread; | ||
| 45 | class Synchronization; | 46 | class Synchronization; |
| 46 | class Thread; | 47 | class Thread; |
| 47 | class TimeManager; | 48 | class TimeManager; |
| @@ -227,6 +228,22 @@ public: | |||
| 227 | 228 | ||
| 228 | void ExitSVCProfile(); | 229 | void ExitSVCProfile(); |
| 229 | 230 | ||
| 231 | /** | ||
| 232 | * Creates an HLE service thread, which are used to execute service routines asynchronously. | ||
| 233 | * While these are allocated per ServerSession, these need to be owned and managed outside of | ||
| 234 | * ServerSession to avoid a circular dependency. | ||
| 235 | * @param name String name for the ServerSession creating this thread, used for debug purposes. | ||
| 236 | * @returns The a weak pointer newly created service thread. | ||
| 237 | */ | ||
| 238 | std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name); | ||
| 239 | |||
| 240 | /** | ||
| 241 | * Releases a HLE service thread, instructing KernelCore to free it. This should be called when | ||
| 242 | * the ServerSession associated with the thread is destroyed. | ||
| 243 | * @param service_thread Service thread to release. | ||
| 244 | */ | ||
| 245 | void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread); | ||
| 246 | |||
| 230 | private: | 247 | private: |
| 231 | friend class Object; | 248 | friend class Object; |
| 232 | friend class Process; | 249 | friend class Process; |
diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/memory/address_space_info.cpp index e4288cab4..6cf43ba24 100644 --- a/src/core/hle/kernel/memory/address_space_info.cpp +++ b/src/core/hle/kernel/memory/address_space_info.cpp | |||
| @@ -96,6 +96,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) { | |||
| 96 | return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address; | 96 | return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address; |
| 97 | } | 97 | } |
| 98 | UNREACHABLE(); | 98 | UNREACHABLE(); |
| 99 | return 0; | ||
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { | 102 | std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { |
| @@ -112,6 +113,7 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) | |||
| 112 | return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size; | 113 | return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size; |
| 113 | } | 114 | } |
| 114 | UNREACHABLE(); | 115 | UNREACHABLE(); |
| 116 | return 0; | ||
| 115 | } | 117 | } |
| 116 | 118 | ||
| 117 | } // namespace Kernel::Memory | 119 | } // namespace Kernel::Memory |
diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h index 37fe19916..83acece1e 100644 --- a/src/core/hle/kernel/memory/memory_block.h +++ b/src/core/hle/kernel/memory/memory_block.h | |||
| @@ -73,12 +73,12 @@ enum class MemoryState : u32 { | |||
| 73 | ThreadLocal = | 73 | ThreadLocal = |
| 74 | static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, | 74 | static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, |
| 75 | 75 | ||
| 76 | Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc | | 76 | Transferred = static_cast<u32>(Svc::MemoryState::Transferred) | FlagsMisc | |
| 77 | FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | | 77 | FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | |
| 78 | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, | 78 | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, |
| 79 | 79 | ||
| 80 | SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc | | 80 | SharedTransferred = static_cast<u32>(Svc::MemoryState::SharedTransferred) | FlagsMisc | |
| 81 | FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, | 81 | FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, |
| 82 | 82 | ||
| 83 | SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped | | 83 | SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped | |
| 84 | FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, | 84 | FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, |
| @@ -111,8 +111,8 @@ static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09); | |||
| 111 | static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); | 111 | static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); |
| 112 | static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); | 112 | static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); |
| 113 | static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); | 113 | static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); |
| 114 | static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D); | 114 | static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D); |
| 115 | static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E); | 115 | static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E); |
| 116 | static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); | 116 | static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); |
| 117 | static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); | 117 | static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); |
| 118 | static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); | 118 | static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); |
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp index f53a7be82..080886554 100644 --- a/src/core/hle/kernel/memory/page_table.cpp +++ b/src/core/hle/kernel/memory/page_table.cpp | |||
| @@ -265,7 +265,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t | |||
| 265 | physical_memory_usage = 0; | 265 | physical_memory_usage = 0; |
| 266 | memory_pool = pool; | 266 | memory_pool = pool; |
| 267 | 267 | ||
| 268 | page_table_impl.Resize(address_space_width, PageBits, true); | 268 | page_table_impl.Resize(address_space_width, PageBits); |
| 269 | 269 | ||
| 270 | return InitializeMemoryLayout(start, end); | 270 | return InitializeMemoryLayout(start, end); |
| 271 | } | 271 | } |
| @@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const { | |||
| 1007 | case MemoryState::Shared: | 1007 | case MemoryState::Shared: |
| 1008 | case MemoryState::AliasCode: | 1008 | case MemoryState::AliasCode: |
| 1009 | case MemoryState::AliasCodeData: | 1009 | case MemoryState::AliasCodeData: |
| 1010 | case MemoryState::Transfered: | 1010 | case MemoryState::Transferred: |
| 1011 | case MemoryState::SharedTransfered: | 1011 | case MemoryState::SharedTransferred: |
| 1012 | case MemoryState::SharedCode: | 1012 | case MemoryState::SharedCode: |
| 1013 | case MemoryState::GeneratedCode: | 1013 | case MemoryState::GeneratedCode: |
| 1014 | case MemoryState::CodeOut: | 1014 | case MemoryState::CodeOut: |
| @@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const { | |||
| 1042 | case MemoryState::Shared: | 1042 | case MemoryState::Shared: |
| 1043 | case MemoryState::AliasCode: | 1043 | case MemoryState::AliasCode: |
| 1044 | case MemoryState::AliasCodeData: | 1044 | case MemoryState::AliasCodeData: |
| 1045 | case MemoryState::Transfered: | 1045 | case MemoryState::Transferred: |
| 1046 | case MemoryState::SharedTransfered: | 1046 | case MemoryState::SharedTransferred: |
| 1047 | case MemoryState::SharedCode: | 1047 | case MemoryState::SharedCode: |
| 1048 | case MemoryState::GeneratedCode: | 1048 | case MemoryState::GeneratedCode: |
| 1049 | case MemoryState::CodeOut: | 1049 | case MemoryState::CodeOut: |
| @@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s | |||
| 1080 | case MemoryState::AliasCodeData: | 1080 | case MemoryState::AliasCodeData: |
| 1081 | case MemoryState::Stack: | 1081 | case MemoryState::Stack: |
| 1082 | case MemoryState::ThreadLocal: | 1082 | case MemoryState::ThreadLocal: |
| 1083 | case MemoryState::Transfered: | 1083 | case MemoryState::Transferred: |
| 1084 | case MemoryState::SharedTransfered: | 1084 | case MemoryState::SharedTransferred: |
| 1085 | case MemoryState::SharedCode: | 1085 | case MemoryState::SharedCode: |
| 1086 | case MemoryState::GeneratedCode: | 1086 | case MemoryState::GeneratedCode: |
| 1087 | case MemoryState::CodeOut: | 1087 | case MemoryState::CodeOut: |
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index a35c8aa4b..b40fe3916 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp | |||
| @@ -25,19 +25,19 @@ | |||
| 25 | namespace Kernel { | 25 | namespace Kernel { |
| 26 | 26 | ||
| 27 | ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} | 27 | ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} |
| 28 | ServerSession::~ServerSession() = default; | 28 | |
| 29 | ServerSession::~ServerSession() { | ||
| 30 | kernel.ReleaseServiceThread(service_thread); | ||
| 31 | } | ||
| 29 | 32 | ||
| 30 | ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, | 33 | ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, |
| 31 | std::shared_ptr<Session> parent, | 34 | std::shared_ptr<Session> parent, |
| 32 | std::string name) { | 35 | std::string name) { |
| 33 | std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; | 36 | std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; |
| 34 | 37 | ||
| 35 | session->request_event = | ||
| 36 | Core::Timing::CreateEvent(name, [session](std::uintptr_t, std::chrono::nanoseconds) { | ||
| 37 | session->CompleteSyncRequest(); | ||
| 38 | }); | ||
| 39 | session->name = std::move(name); | 38 | session->name = std::move(name); |
| 40 | session->parent = std::move(parent); | 39 | session->parent = std::move(parent); |
| 40 | session->service_thread = kernel.CreateServiceThread(session->name); | ||
| 41 | 41 | ||
| 42 | return MakeResult(std::move(session)); | 42 | return MakeResult(std::move(session)); |
| 43 | } | 43 | } |
| @@ -142,16 +142,16 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, | |||
| 142 | std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); | 142 | std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); |
| 143 | 143 | ||
| 144 | context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); | 144 | context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); |
| 145 | request_queue.Push(std::move(context)); | 145 | |
| 146 | if (auto strong_ptr = service_thread.lock()) { | ||
| 147 | strong_ptr->QueueSyncRequest(*this, std::move(context)); | ||
| 148 | return RESULT_SUCCESS; | ||
| 149 | } | ||
| 146 | 150 | ||
| 147 | return RESULT_SUCCESS; | 151 | return RESULT_SUCCESS; |
| 148 | } | 152 | } |
| 149 | 153 | ||
| 150 | ResultCode ServerSession::CompleteSyncRequest() { | 154 | ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) { |
| 151 | ASSERT(!request_queue.Empty()); | ||
| 152 | |||
| 153 | auto& context = *request_queue.Front(); | ||
| 154 | |||
| 155 | ResultCode result = RESULT_SUCCESS; | 155 | ResultCode result = RESULT_SUCCESS; |
| 156 | // If the session has been converted to a domain, handle the domain request | 156 | // If the session has been converted to a domain, handle the domain request |
| 157 | if (IsDomain() && context.HasDomainMessageHeader()) { | 157 | if (IsDomain() && context.HasDomainMessageHeader()) { |
| @@ -177,18 +177,13 @@ ResultCode ServerSession::CompleteSyncRequest() { | |||
| 177 | } | 177 | } |
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | request_queue.Pop(); | ||
| 181 | |||
| 182 | return result; | 180 | return result; |
| 183 | } | 181 | } |
| 184 | 182 | ||
| 185 | ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, | 183 | ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, |
| 186 | Core::Memory::Memory& memory, | 184 | Core::Memory::Memory& memory, |
| 187 | Core::Timing::CoreTiming& core_timing) { | 185 | Core::Timing::CoreTiming& core_timing) { |
| 188 | const ResultCode result = QueueSyncRequest(std::move(thread), memory); | 186 | return QueueSyncRequest(std::move(thread), memory); |
| 189 | const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000}; | ||
| 190 | core_timing.ScheduleEvent(delay, request_event, {}); | ||
| 191 | return result; | ||
| 192 | } | 187 | } |
| 193 | 188 | ||
| 194 | } // namespace Kernel | 189 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index d23e9ec68..e8d1d99ea 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "common/threadsafe_queue.h" | 12 | #include "common/threadsafe_queue.h" |
| 13 | #include "core/hle/kernel/service_thread.h" | ||
| 13 | #include "core/hle/kernel/synchronization_object.h" | 14 | #include "core/hle/kernel/synchronization_object.h" |
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | 16 | ||
| @@ -43,6 +44,8 @@ class Thread; | |||
| 43 | * TLS buffer and control is transferred back to it. | 44 | * TLS buffer and control is transferred back to it. |
| 44 | */ | 45 | */ |
| 45 | class ServerSession final : public SynchronizationObject { | 46 | class ServerSession final : public SynchronizationObject { |
| 47 | friend class ServiceThread; | ||
| 48 | |||
| 46 | public: | 49 | public: |
| 47 | explicit ServerSession(KernelCore& kernel); | 50 | explicit ServerSession(KernelCore& kernel); |
| 48 | ~ServerSession() override; | 51 | ~ServerSession() override; |
| @@ -132,7 +135,7 @@ private: | |||
| 132 | ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); | 135 | ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); |
| 133 | 136 | ||
| 134 | /// Completes a sync request from the emulated application. | 137 | /// Completes a sync request from the emulated application. |
| 135 | ResultCode CompleteSyncRequest(); | 138 | ResultCode CompleteSyncRequest(HLERequestContext& context); |
| 136 | 139 | ||
| 137 | /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an | 140 | /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an |
| 138 | /// object handle. | 141 | /// object handle. |
| @@ -163,11 +166,8 @@ private: | |||
| 163 | /// The name of this session (optional) | 166 | /// The name of this session (optional) |
| 164 | std::string name; | 167 | std::string name; |
| 165 | 168 | ||
| 166 | /// Core timing event used to schedule the service request at some point in the future | 169 | /// Thread to dispatch service requests |
| 167 | std::shared_ptr<Core::Timing::EventType> request_event; | 170 | std::weak_ptr<ServiceThread> service_thread; |
| 168 | |||
| 169 | /// Queue of scheduled service requests | ||
| 170 | Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue; | ||
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| 173 | } // namespace Kernel | 173 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp new file mode 100644 index 000000000..ee46f3e21 --- /dev/null +++ b/src/core/hle/kernel/service_thread.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <condition_variable> | ||
| 6 | #include <functional> | ||
| 7 | #include <mutex> | ||
| 8 | #include <thread> | ||
| 9 | #include <vector> | ||
| 10 | #include <queue> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/scope_exit.h" | ||
| 14 | #include "common/thread.h" | ||
| 15 | #include "core/core.h" | ||
| 16 | #include "core/hle/kernel/kernel.h" | ||
| 17 | #include "core/hle/kernel/server_session.h" | ||
| 18 | #include "core/hle/kernel/service_thread.h" | ||
| 19 | #include "core/hle/lock.h" | ||
| 20 | #include "video_core/renderer_base.h" | ||
| 21 | |||
| 22 | namespace Kernel { | ||
| 23 | |||
| 24 | class ServiceThread::Impl final { | ||
| 25 | public: | ||
| 26 | explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name); | ||
| 27 | ~Impl(); | ||
| 28 | |||
| 29 | void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); | ||
| 30 | |||
| 31 | private: | ||
| 32 | std::vector<std::thread> threads; | ||
| 33 | std::queue<std::function<void()>> requests; | ||
| 34 | std::mutex queue_mutex; | ||
| 35 | std::condition_variable condition; | ||
| 36 | const std::string service_name; | ||
| 37 | bool stop{}; | ||
| 38 | }; | ||
| 39 | |||
| 40 | ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name) | ||
| 41 | : service_name{name} { | ||
| 42 | for (std::size_t i = 0; i < num_threads; ++i) | ||
| 43 | threads.emplace_back([this, &kernel] { | ||
| 44 | Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str()); | ||
| 45 | |||
| 46 | // Wait for first request before trying to acquire a render context | ||
| 47 | { | ||
| 48 | std::unique_lock lock{queue_mutex}; | ||
| 49 | condition.wait(lock, [this] { return stop || !requests.empty(); }); | ||
| 50 | } | ||
| 51 | |||
| 52 | kernel.RegisterHostThread(); | ||
| 53 | |||
| 54 | while (true) { | ||
| 55 | std::function<void()> task; | ||
| 56 | |||
| 57 | { | ||
| 58 | std::unique_lock lock{queue_mutex}; | ||
| 59 | condition.wait(lock, [this] { return stop || !requests.empty(); }); | ||
| 60 | if (stop || requests.empty()) { | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | task = std::move(requests.front()); | ||
| 64 | requests.pop(); | ||
| 65 | } | ||
| 66 | |||
| 67 | task(); | ||
| 68 | } | ||
| 69 | }); | ||
| 70 | } | ||
| 71 | |||
| 72 | void ServiceThread::Impl::QueueSyncRequest(ServerSession& session, | ||
| 73 | std::shared_ptr<HLERequestContext>&& context) { | ||
| 74 | { | ||
| 75 | std::unique_lock lock{queue_mutex}; | ||
| 76 | |||
| 77 | // ServerSession owns the service thread, so we cannot caption a strong pointer here in the | ||
| 78 | // event that the ServerSession is terminated. | ||
| 79 | std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)}; | ||
| 80 | requests.emplace([weak_ptr, context{std::move(context)}]() { | ||
| 81 | if (auto strong_ptr = weak_ptr.lock()) { | ||
| 82 | strong_ptr->CompleteSyncRequest(*context); | ||
| 83 | } | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | condition.notify_one(); | ||
| 87 | } | ||
| 88 | |||
| 89 | ServiceThread::Impl::~Impl() { | ||
| 90 | { | ||
| 91 | std::unique_lock lock{queue_mutex}; | ||
| 92 | stop = true; | ||
| 93 | } | ||
| 94 | condition.notify_all(); | ||
| 95 | for (std::thread& thread : threads) { | ||
| 96 | thread.join(); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name) | ||
| 101 | : impl{std::make_unique<Impl>(kernel, num_threads, name)} {} | ||
| 102 | |||
| 103 | ServiceThread::~ServiceThread() = default; | ||
| 104 | |||
| 105 | void ServiceThread::QueueSyncRequest(ServerSession& session, | ||
| 106 | std::shared_ptr<HLERequestContext>&& context) { | ||
| 107 | impl->QueueSyncRequest(session, std::move(context)); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/service_thread.h b/src/core/hle/kernel/service_thread.h new file mode 100644 index 000000000..025ab8fb5 --- /dev/null +++ b/src/core/hle/kernel/service_thread.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <string> | ||
| 9 | |||
| 10 | namespace Kernel { | ||
| 11 | |||
| 12 | class HLERequestContext; | ||
| 13 | class KernelCore; | ||
| 14 | class ServerSession; | ||
| 15 | |||
| 16 | class ServiceThread final { | ||
| 17 | public: | ||
| 18 | explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name); | ||
| 19 | ~ServiceThread(); | ||
| 20 | |||
| 21 | void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); | ||
| 22 | |||
| 23 | private: | ||
| 24 | class Impl; | ||
| 25 | std::unique_ptr<Impl> impl; | ||
| 26 | }; | ||
| 27 | |||
| 28 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 2d225392f..de3ed25da 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -1583,7 +1583,7 @@ static void ExitThread32(Core::System& system) { | |||
| 1583 | 1583 | ||
| 1584 | /// Sleep the current thread | 1584 | /// Sleep the current thread |
| 1585 | static void SleepThread(Core::System& system, s64 nanoseconds) { | 1585 | static void SleepThread(Core::System& system, s64 nanoseconds) { |
| 1586 | LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); | 1586 | LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); |
| 1587 | 1587 | ||
| 1588 | enum class SleepType : s64 { | 1588 | enum class SleepType : s64 { |
| 1589 | YieldWithoutCoreMigration = 0, | 1589 | YieldWithoutCoreMigration = 0, |
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h index 986724beb..11e1d8e2d 100644 --- a/src/core/hle/kernel/svc_types.h +++ b/src/core/hle/kernel/svc_types.h | |||
| @@ -23,8 +23,8 @@ enum class MemoryState : u32 { | |||
| 23 | Ipc = 0x0A, | 23 | Ipc = 0x0A, |
| 24 | Stack = 0x0B, | 24 | Stack = 0x0B, |
| 25 | ThreadLocal = 0x0C, | 25 | ThreadLocal = 0x0C, |
| 26 | Transfered = 0x0D, | 26 | Transferred = 0x0D, |
| 27 | SharedTransfered = 0x0E, | 27 | SharedTransferred = 0x0E, |
| 28 | SharedCode = 0x0F, | 28 | SharedCode = 0x0F, |
| 29 | Inaccessible = 0x10, | 29 | Inaccessible = 0x10, |
| 30 | NonSecureIpc = 0x11, | 30 | NonSecureIpc = 0x11, |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index cb13210e5..c9808060a 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest | |||
| 560 | 560 | ||
| 561 | AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { | 561 | AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { |
| 562 | on_new_message = | 562 | on_new_message = |
| 563 | Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved"); | 563 | Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived"); |
| 564 | on_operation_mode_changed = | 564 | on_operation_mode_changed = |
| 565 | Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged"); | 565 | Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged"); |
| 566 | } | 566 | } |
| 567 | 567 | ||
| 568 | AppletMessageQueue::~AppletMessageQueue() = default; | 568 | AppletMessageQueue::~AppletMessageQueue() = default; |
| 569 | 569 | ||
| 570 | const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMesssageRecieveEvent() const { | 570 | const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMessageReceiveEvent() const { |
| 571 | return on_new_message.readable; | 571 | return on_new_message.readable; |
| 572 | } | 572 | } |
| 573 | 573 | ||
| @@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) { | |||
| 675 | 675 | ||
| 676 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 676 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| 677 | rb.Push(RESULT_SUCCESS); | 677 | rb.Push(RESULT_SUCCESS); |
| 678 | rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent()); | 678 | rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent()); |
| 679 | } | 679 | } |
| 680 | 680 | ||
| 681 | void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { | 681 | void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index b1da0d081..f51aca1af 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h | |||
| @@ -55,7 +55,7 @@ public: | |||
| 55 | explicit AppletMessageQueue(Kernel::KernelCore& kernel); | 55 | explicit AppletMessageQueue(Kernel::KernelCore& kernel); |
| 56 | ~AppletMessageQueue(); | 56 | ~AppletMessageQueue(); |
| 57 | 57 | ||
| 58 | const std::shared_ptr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; | 58 | const std::shared_ptr<Kernel::ReadableEvent>& GetMessageReceiveEvent() const; |
| 59 | const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const; | 59 | const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const; |
| 60 | void PushMessage(AppletMessage msg); | 60 | void PushMessage(AppletMessage msg); |
| 61 | AppletMessage PopMessage(); | 61 | AppletMessage PopMessage(); |
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index 298f6d520..0bff97a37 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp | |||
| @@ -56,7 +56,7 @@ APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& contro | |||
| 56 | static const FunctionInfo functions[] = { | 56 | static const FunctionInfo functions[] = { |
| 57 | {0, &APM::OpenSession, "OpenSession"}, | 57 | {0, &APM::OpenSession, "OpenSession"}, |
| 58 | {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, | 58 | {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, |
| 59 | {6, nullptr, "IsCpuOverclockEnabled"}, | 59 | {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"}, |
| 60 | }; | 60 | }; |
| 61 | RegisterHandlers(functions); | 61 | RegisterHandlers(functions); |
| 62 | } | 62 | } |
| @@ -78,6 +78,14 @@ void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) { | |||
| 78 | rb.PushEnum(controller.GetCurrentPerformanceMode()); | 78 | rb.PushEnum(controller.GetCurrentPerformanceMode()); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) { | ||
| 82 | LOG_WARNING(Service_APM, "(STUBBED) called"); | ||
| 83 | |||
| 84 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 85 | rb.Push(RESULT_SUCCESS); | ||
| 86 | rb.Push(false); | ||
| 87 | } | ||
| 88 | |||
| 81 | APM_Sys::APM_Sys(Core::System& system_, Controller& controller_) | 89 | APM_Sys::APM_Sys(Core::System& system_, Controller& controller_) |
| 82 | : ServiceFramework{system_, "apm:sys"}, controller{controller_} { | 90 | : ServiceFramework{system_, "apm:sys"}, controller{controller_} { |
| 83 | // clang-format off | 91 | // clang-format off |
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h index 7d57c4978..063ad5308 100644 --- a/src/core/hle/service/apm/interface.h +++ b/src/core/hle/service/apm/interface.h | |||
| @@ -20,6 +20,7 @@ public: | |||
| 20 | private: | 20 | private: |
| 21 | void OpenSession(Kernel::HLERequestContext& ctx); | 21 | void OpenSession(Kernel::HLERequestContext& ctx); |
| 22 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | 22 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); |
| 23 | void IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx); | ||
| 23 | 24 | ||
| 24 | std::shared_ptr<Module> apm; | 25 | std::shared_ptr<Module> apm; |
| 25 | Controller& controller; | 26 | Controller& controller; |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 145f47ee2..0cd797109 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -70,8 +70,10 @@ public: | |||
| 70 | Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased"); | 70 | Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased"); |
| 71 | 71 | ||
| 72 | stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, | 72 | stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, |
| 73 | audio_params.channel_count, std::move(unique_name), | 73 | audio_params.channel_count, std::move(unique_name), [this] { |
| 74 | [this] { buffer_event.writable->Signal(); }); | 74 | const auto guard = LockService(); |
| 75 | buffer_event.writable->Signal(); | ||
| 76 | }); | ||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | private: | 79 | private: |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 6e7b7316c..c5c22d053 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -49,16 +49,16 @@ public: | |||
| 49 | 49 | ||
| 50 | system_event = | 50 | system_event = |
| 51 | Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent"); | 51 | Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent"); |
| 52 | renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(), | 52 | renderer = std::make_unique<AudioCore::AudioRenderer>( |
| 53 | audren_params, system_event.writable, | 53 | system.CoreTiming(), system.Memory(), audren_params, |
| 54 | instance_number); | 54 | [this]() { |
| 55 | const auto guard = LockService(); | ||
| 56 | system_event.writable->Signal(); | ||
| 57 | }, | ||
| 58 | instance_number); | ||
| 55 | } | 59 | } |
| 56 | 60 | ||
| 57 | private: | 61 | private: |
| 58 | void UpdateAudioCallback() { | ||
| 59 | system_event.writable->Signal(); | ||
| 60 | } | ||
| 61 | |||
| 62 | void GetSampleRate(Kernel::HLERequestContext& ctx) { | 62 | void GetSampleRate(Kernel::HLERequestContext& ctx) { |
| 63 | LOG_DEBUG(Service_Audio, "called"); | 63 | LOG_DEBUG(Service_Audio, "called"); |
| 64 | 64 | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index b3c7234e1..8d95f74e6 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -78,11 +78,13 @@ IAppletResource::IAppletResource(Core::System& system_) | |||
| 78 | pad_update_event = Core::Timing::CreateEvent( | 78 | pad_update_event = Core::Timing::CreateEvent( |
| 79 | "HID::UpdatePadCallback", | 79 | "HID::UpdatePadCallback", |
| 80 | [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { | 80 | [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { |
| 81 | const auto guard = LockService(); | ||
| 81 | UpdateControllers(user_data, ns_late); | 82 | UpdateControllers(user_data, ns_late); |
| 82 | }); | 83 | }); |
| 83 | motion_update_event = Core::Timing::CreateEvent( | 84 | motion_update_event = Core::Timing::CreateEvent( |
| 84 | "HID::MotionPadCallback", | 85 | "HID::MotionPadCallback", |
| 85 | [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { | 86 | [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { |
| 87 | const auto guard = LockService(); | ||
| 86 | UpdateMotion(user_data, ns_late); | 88 | UpdateMotion(user_data, ns_late); |
| 87 | }); | 89 | }); |
| 88 | 90 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 44a8bc060..5681599ba 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h | |||
| @@ -31,8 +31,8 @@ public: | |||
| 31 | * @param output A buffer where the output data will be written to. | 31 | * @param output A buffer where the output data will be written to. |
| 32 | * @returns The result code of the ioctl. | 32 | * @returns The result code of the ioctl. |
| 33 | */ | 33 | */ |
| 34 | virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 34 | virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 35 | IoctlCtrl& ctrl) = 0; | 35 | std::vector<u8>& output) = 0; |
| 36 | 36 | ||
| 37 | /** | 37 | /** |
| 38 | * Handles an ioctl2 request. | 38 | * Handles an ioctl2 request. |
| @@ -43,8 +43,7 @@ public: | |||
| 43 | * @returns The result code of the ioctl. | 43 | * @returns The result code of the ioctl. |
| 44 | */ | 44 | */ |
| 45 | virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 45 | virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 46 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 46 | const std::vector<u8>& inline_input, std::vector<u8>& output) = 0; |
| 47 | IoctlCtrl& ctrl) = 0; | ||
| 48 | 47 | ||
| 49 | /** | 48 | /** |
| 50 | * Handles an ioctl3 request. | 49 | * Handles an ioctl3 request. |
| @@ -55,7 +54,7 @@ public: | |||
| 55 | * @returns The result code of the ioctl. | 54 | * @returns The result code of the ioctl. |
| 56 | */ | 55 | */ |
| 57 | virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 56 | virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 58 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0; | 57 | std::vector<u8>& inline_output) = 0; |
| 59 | 58 | ||
| 60 | protected: | 59 | protected: |
| 61 | Core::System& system; | 60 | Core::System& system; |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 170a7c9a0..ce615c758 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -18,21 +18,20 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de | |||
| 18 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | 18 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} |
| 19 | nvdisp_disp0 ::~nvdisp_disp0() = default; | 19 | nvdisp_disp0 ::~nvdisp_disp0() = default; |
| 20 | 20 | ||
| 21 | NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 21 | NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 22 | IoctlCtrl& ctrl) { | 22 | std::vector<u8>& output) { |
| 23 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 23 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 24 | return NvResult::NotImplemented; | 24 | return NvResult::NotImplemented; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input, | 27 | NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 28 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 28 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 29 | IoctlCtrl& ctrl) { | ||
| 30 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 29 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 31 | return NvResult::NotImplemented; | 30 | return NvResult::NotImplemented; |
| 32 | } | 31 | } |
| 33 | 32 | ||
| 34 | NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 33 | NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 35 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 34 | std::vector<u8>& inline_output) { |
| 36 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 35 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 37 | return NvResult::NotImplemented; | 36 | return NvResult::NotImplemented; |
| 38 | } | 37 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index eb7575e40..55a33b7e4 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -20,13 +20,11 @@ public: | |||
| 20 | explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvdisp_disp0() override; | 21 | ~nvdisp_disp0() override; |
| 22 | 22 | ||
| 23 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 23 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 24 | IoctlCtrl& ctrl) override; | ||
| 25 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 24 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 26 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 25 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 27 | IoctlCtrl& ctrl) override; | ||
| 28 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 26 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 29 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 27 | std::vector<u8>& inline_output) override; |
| 30 | 28 | ||
| 31 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 29 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
| 32 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, | 30 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 4e0652c39..6b062e10e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -21,8 +21,8 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_ | |||
| 21 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | 21 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} |
| 22 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 22 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 23 | 23 | ||
| 24 | NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 24 | NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 25 | IoctlCtrl& ctrl) { | 25 | std::vector<u8>& output) { |
| 26 | switch (command.group) { | 26 | switch (command.group) { |
| 27 | case 'A': | 27 | case 'A': |
| 28 | switch (command.cmd) { | 28 | switch (command.cmd) { |
| @@ -55,14 +55,13 @@ NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std: | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, | 57 | NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 58 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 58 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 59 | IoctlCtrl& ctrl) { | ||
| 60 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 59 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 61 | return NvResult::NotImplemented; | 60 | return NvResult::NotImplemented; |
| 62 | } | 61 | } |
| 63 | 62 | ||
| 64 | NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 63 | NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 65 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 64 | std::vector<u8>& inline_output) { |
| 66 | switch (command.group) { | 65 | switch (command.group) { |
| 67 | case 'A': | 66 | case 'A': |
| 68 | switch (command.cmd) { | 67 | switch (command.cmd) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 2bd355af9..08035fa0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -30,13 +30,11 @@ public: | |||
| 30 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 30 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 31 | ~nvhost_as_gpu() override; | 31 | ~nvhost_as_gpu() override; |
| 32 | 32 | ||
| 33 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 33 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 34 | IoctlCtrl& ctrl) override; | ||
| 35 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 34 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 36 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 35 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 37 | IoctlCtrl& ctrl) override; | ||
| 38 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 36 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 39 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 37 | std::vector<u8>& inline_output) override; |
| 40 | 38 | ||
| 41 | private: | 39 | private: |
| 42 | class BufferMap final { | 40 | class BufferMap final { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 92d31b620..fea3b7b9f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -20,8 +20,7 @@ nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, | |||
| 20 | : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} | 20 | : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} |
| 21 | nvhost_ctrl::~nvhost_ctrl() = default; | 21 | nvhost_ctrl::~nvhost_ctrl() = default; |
| 22 | 22 | ||
| 23 | NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 23 | NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { |
| 24 | IoctlCtrl& ctrl) { | ||
| 25 | switch (command.group) { | 24 | switch (command.group) { |
| 26 | case 0x0: | 25 | case 0x0: |
| 27 | switch (command.cmd) { | 26 | switch (command.cmd) { |
| @@ -30,9 +29,9 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v | |||
| 30 | case 0x1c: | 29 | case 0x1c: |
| 31 | return IocCtrlClearEventWait(input, output); | 30 | return IocCtrlClearEventWait(input, output); |
| 32 | case 0x1d: | 31 | case 0x1d: |
| 33 | return IocCtrlEventWait(input, output, false, ctrl); | 32 | return IocCtrlEventWait(input, output, false); |
| 34 | case 0x1e: | 33 | case 0x1e: |
| 35 | return IocCtrlEventWait(input, output, true, ctrl); | 34 | return IocCtrlEventWait(input, output, true); |
| 36 | case 0x1f: | 35 | case 0x1f: |
| 37 | return IocCtrlEventRegister(input, output); | 36 | return IocCtrlEventRegister(input, output); |
| 38 | case 0x20: | 37 | case 0x20: |
| @@ -48,14 +47,13 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v | |||
| 48 | } | 47 | } |
| 49 | 48 | ||
| 50 | NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input, | 49 | NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 51 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 50 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 52 | IoctlCtrl& ctrl) { | ||
| 53 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 51 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 54 | return NvResult::NotImplemented; | 52 | return NvResult::NotImplemented; |
| 55 | } | 53 | } |
| 56 | 54 | ||
| 57 | NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 55 | NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 58 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 56 | std::vector<u8>& inline_outpu) { |
| 59 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 57 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 60 | return NvResult::NotImplemented; | 58 | return NvResult::NotImplemented; |
| 61 | } | 59 | } |
| @@ -69,7 +67,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector | |||
| 69 | } | 67 | } |
| 70 | 68 | ||
| 71 | NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, | 69 | NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, |
| 72 | bool is_async, IoctlCtrl& ctrl) { | 70 | bool is_async) { |
| 73 | IocCtrlEventWaitParams params{}; | 71 | IocCtrlEventWaitParams params{}; |
| 74 | std::memcpy(¶ms, input.data(), sizeof(params)); | 72 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 75 | LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", | 73 | LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", |
| @@ -141,12 +139,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 141 | params.value |= event_id; | 139 | params.value |= event_id; |
| 142 | event.event.writable->Clear(); | 140 | event.event.writable->Clear(); |
| 143 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | 141 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); |
| 144 | if (!is_async && ctrl.fresh_call) { | ||
| 145 | ctrl.must_delay = true; | ||
| 146 | ctrl.timeout = params.timeout; | ||
| 147 | ctrl.event_id = event_id; | ||
| 148 | return NvResult::Timeout; | ||
| 149 | } | ||
| 150 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 142 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 151 | return NvResult::Timeout; | 143 | return NvResult::Timeout; |
| 152 | } | 144 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 107168e21..c5aa1362a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -18,13 +18,11 @@ public: | |||
| 18 | SyncpointManager& syncpoint_manager); | 18 | SyncpointManager& syncpoint_manager); |
| 19 | ~nvhost_ctrl() override; | 19 | ~nvhost_ctrl() override; |
| 20 | 20 | ||
| 21 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 21 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 22 | IoctlCtrl& ctrl) override; | ||
| 23 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 22 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 24 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 23 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 25 | IoctlCtrl& ctrl) override; | ||
| 26 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 24 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 27 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 25 | std::vector<u8>& inline_output) override; |
| 28 | 26 | ||
| 29 | private: | 27 | private: |
| 30 | struct IocSyncptReadParams { | 28 | struct IocSyncptReadParams { |
| @@ -123,8 +121,7 @@ private: | |||
| 123 | static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size"); | 121 | static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size"); |
| 124 | 122 | ||
| 125 | NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); | 123 | NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); |
| 126 | NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, | 124 | NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); |
| 127 | IoctlCtrl& ctrl); | ||
| 128 | NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); | 125 | NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); |
| 129 | NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); | 126 | NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); |
| 130 | NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); | 127 | NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 647f5907e..0320d3ae2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -16,7 +16,7 @@ nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {} | |||
| 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; | 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; |
| 17 | 17 | ||
| 18 | NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, | 18 | NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 19 | std::vector<u8>& output, IoctlCtrl& ctrl) { | 19 | std::vector<u8>& output) { |
| 20 | switch (command.group) { | 20 | switch (command.group) { |
| 21 | case 'G': | 21 | case 'G': |
| 22 | switch (command.cmd) { | 22 | switch (command.cmd) { |
| @@ -48,15 +48,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, | 50 | NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 51 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 51 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 52 | IoctlCtrl& ctrl) { | ||
| 53 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 52 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 54 | return NvResult::NotImplemented; | 53 | return NvResult::NotImplemented; |
| 55 | } | 54 | } |
| 56 | 55 | ||
| 57 | NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, | 56 | NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, |
| 58 | std::vector<u8>& output, std::vector<u8>& inline_output, | 57 | std::vector<u8>& output, std::vector<u8>& inline_output) { |
| 59 | IoctlCtrl& ctrl) { | ||
| 60 | switch (command.group) { | 58 | switch (command.group) { |
| 61 | case 'G': | 59 | case 'G': |
| 62 | switch (command.cmd) { | 60 | switch (command.cmd) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index c2fffe734..137b88238 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -16,13 +16,11 @@ public: | |||
| 16 | explicit nvhost_ctrl_gpu(Core::System& system); | 16 | explicit nvhost_ctrl_gpu(Core::System& system); |
| 17 | ~nvhost_ctrl_gpu() override; | 17 | ~nvhost_ctrl_gpu() override; |
| 18 | 18 | ||
| 19 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 19 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 20 | IoctlCtrl& ctrl) override; | ||
| 21 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 20 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 22 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 21 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 23 | IoctlCtrl& ctrl) override; | ||
| 24 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 22 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 25 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 23 | std::vector<u8>& inline_output) override; |
| 26 | 24 | ||
| 27 | private: | 25 | private: |
| 28 | struct IoctlGpuCharacteristics { | 26 | struct IoctlGpuCharacteristics { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index b0c2caba5..af8b3d9f1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -23,8 +23,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, | |||
| 23 | 23 | ||
| 24 | nvhost_gpu::~nvhost_gpu() = default; | 24 | nvhost_gpu::~nvhost_gpu() = default; |
| 25 | 25 | ||
| 26 | NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 26 | NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { |
| 27 | IoctlCtrl& ctrl) { | ||
| 28 | switch (command.group) { | 27 | switch (command.group) { |
| 29 | case 0x0: | 28 | case 0x0: |
| 30 | switch (command.cmd) { | 29 | switch (command.cmd) { |
| @@ -76,8 +75,7 @@ NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve | |||
| 76 | }; | 75 | }; |
| 77 | 76 | ||
| 78 | NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, | 77 | NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 79 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 78 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 80 | IoctlCtrl& ctrl) { | ||
| 81 | switch (command.group) { | 79 | switch (command.group) { |
| 82 | case 'H': | 80 | case 'H': |
| 83 | switch (command.cmd) { | 81 | switch (command.cmd) { |
| @@ -91,7 +89,7 @@ NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, | |||
| 91 | } | 89 | } |
| 92 | 90 | ||
| 93 | NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 91 | NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 94 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 92 | std::vector<u8>& inline_output) { |
| 95 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 93 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 96 | return NvResult::NotImplemented; | 94 | return NvResult::NotImplemented; |
| 97 | } | 95 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index aa0048a9d..e0298b4fe 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -26,13 +26,11 @@ public: | |||
| 26 | SyncpointManager& syncpoint_manager); | 26 | SyncpointManager& syncpoint_manager); |
| 27 | ~nvhost_gpu() override; | 27 | ~nvhost_gpu() override; |
| 28 | 28 | ||
| 29 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 29 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 30 | IoctlCtrl& ctrl) override; | ||
| 31 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 30 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 32 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 31 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 33 | IoctlCtrl& ctrl) override; | ||
| 34 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 32 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 35 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 33 | std::vector<u8>& inline_output) override; |
| 36 | 34 | ||
| 37 | private: | 35 | private: |
| 38 | enum class CtxObjects : u32_le { | 36 | enum class CtxObjects : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index b8328c314..36970f828 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -11,12 +11,13 @@ | |||
| 11 | 11 | ||
| 12 | namespace Service::Nvidia::Devices { | 12 | namespace Service::Nvidia::Devices { |
| 13 | 13 | ||
| 14 | nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | 14 | nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 15 | : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} | 15 | SyncpointManager& syncpoint_manager) |
| 16 | : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} | ||
| 16 | nvhost_nvdec::~nvhost_nvdec() = default; | 17 | nvhost_nvdec::~nvhost_nvdec() = default; |
| 17 | 18 | ||
| 18 | NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 19 | NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 19 | IoctlCtrl& ctrl) { | 20 | std::vector<u8>& output) { |
| 20 | switch (command.group) { | 21 | switch (command.group) { |
| 21 | case 0x0: | 22 | case 0x0: |
| 22 | switch (command.cmd) { | 23 | switch (command.cmd) { |
| @@ -58,14 +59,13 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std:: | |||
| 58 | } | 59 | } |
| 59 | 60 | ||
| 60 | NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input, | 61 | NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 61 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 62 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 62 | IoctlCtrl& ctrl) { | ||
| 63 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 63 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 64 | return NvResult::NotImplemented; | 64 | return NvResult::NotImplemented; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 67 | NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 68 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 68 | std::vector<u8>& inline_output) { |
| 69 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 69 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 70 | return NvResult::NotImplemented; | 70 | return NvResult::NotImplemented; |
| 71 | } | 71 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 884ed6c5b..77ef53cdd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | |||
| @@ -11,16 +11,15 @@ namespace Service::Nvidia::Devices { | |||
| 11 | 11 | ||
| 12 | class nvhost_nvdec final : public nvhost_nvdec_common { | 12 | class nvhost_nvdec final : public nvhost_nvdec_common { |
| 13 | public: | 13 | public: |
| 14 | explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 14 | explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 15 | SyncpointManager& syncpoint_manager); | ||
| 15 | ~nvhost_nvdec() override; | 16 | ~nvhost_nvdec() override; |
| 16 | 17 | ||
| 17 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 18 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 18 | IoctlCtrl& ctrl) override; | ||
| 19 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 19 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 20 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 20 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 21 | IoctlCtrl& ctrl) override; | ||
| 22 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 21 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 23 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 22 | std::vector<u8>& inline_output) override; |
| 24 | }; | 23 | }; |
| 25 | 24 | ||
| 26 | } // namespace Service::Nvidia::Devices | 25 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b49cecb42..4898dc27a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "core/core.h" | 11 | #include "core/core.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" | 12 | #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" |
| 13 | #include "core/hle/service/nvdrv/devices/nvmap.h" | 13 | #include "core/hle/service/nvdrv/devices/nvmap.h" |
| 14 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||
| 14 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 15 | #include "video_core/memory_manager.h" | 16 | #include "video_core/memory_manager.h" |
| 16 | #include "video_core/renderer_base.h" | 17 | #include "video_core/renderer_base.h" |
| @@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s | |||
| 36 | } | 37 | } |
| 37 | } // Anonymous namespace | 38 | } // Anonymous namespace |
| 38 | 39 | ||
| 39 | nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | 40 | nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 40 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | 41 | SyncpointManager& syncpoint_manager) |
| 42 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {} | ||
| 41 | nvhost_nvdec_common::~nvhost_nvdec_common() = default; | 43 | nvhost_nvdec_common::~nvhost_nvdec_common() = default; |
| 42 | 44 | ||
| 43 | NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { | 45 | NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { |
| @@ -71,10 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 71 | offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); | 73 | offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); |
| 72 | offset = SpliceVectors(input, fences, params.fence_count, offset); | 74 | offset = SpliceVectors(input, fences, params.fence_count, offset); |
| 73 | 75 | ||
| 74 | // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment | ||
| 75 | |||
| 76 | auto& gpu = system.GPU(); | 76 | auto& gpu = system.GPU(); |
| 77 | 77 | if (gpu.UseNvdec()) { | |
| 78 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | ||
| 79 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; | ||
| 80 | fences[i].id = syncpt_incr.id; | ||
| 81 | fences[i].value = | ||
| 82 | syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments); | ||
| 83 | } | ||
| 84 | } | ||
| 78 | for (const auto& cmd_buffer : command_buffers) { | 85 | for (const auto& cmd_buffer : command_buffers) { |
| 79 | auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); | 86 | auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); |
| 80 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | 87 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |
| @@ -89,7 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 89 | cmdlist.size() * sizeof(u32)); | 96 | cmdlist.size() * sizeof(u32)); |
| 90 | gpu.PushCommandBuffer(cmdlist); | 97 | gpu.PushCommandBuffer(cmdlist); |
| 91 | } | 98 | } |
| 99 | if (gpu.UseNvdec()) { | ||
| 92 | 100 | ||
| 101 | fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); | ||
| 102 | |||
| 103 | Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; | ||
| 104 | gpu.PushCommandBuffer(cmdlist); | ||
| 105 | } | ||
| 93 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); | 106 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); |
| 94 | // Some games expect command_buffers to be written back | 107 | // Some games expect command_buffers to be written back |
| 95 | offset = sizeof(IoctlSubmit); | 108 | offset = sizeof(IoctlSubmit); |
| @@ -98,6 +111,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 98 | offset = WriteVectors(output, reloc_shifts, offset); | 111 | offset = WriteVectors(output, reloc_shifts, offset); |
| 99 | offset = WriteVectors(output, syncpt_increments, offset); | 112 | offset = WriteVectors(output, syncpt_increments, offset); |
| 100 | offset = WriteVectors(output, wait_checks, offset); | 113 | offset = WriteVectors(output, wait_checks, offset); |
| 114 | offset = WriteVectors(output, fences, offset); | ||
| 101 | 115 | ||
| 102 | return NvResult::Success; | 116 | return NvResult::Success; |
| 103 | } | 117 | } |
| @@ -107,9 +121,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve | |||
| 107 | std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); | 121 | std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); |
| 108 | LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); | 122 | LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); |
| 109 | 123 | ||
| 110 | // We found that implementing this causes deadlocks with async gpu, along with degraded | 124 | if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { |
| 111 | // performance. TODO: RE the nvdec async implementation | 125 | device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); |
| 112 | params.value = 0; | 126 | } |
| 127 | params.value = device_syncpoints[params.param]; | ||
| 113 | std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); | 128 | std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); |
| 114 | 129 | ||
| 115 | return NvResult::Success; | 130 | return NvResult::Success; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index d9f95ba58..4c9d4ba41 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | |||
| @@ -10,12 +10,16 @@ | |||
| 10 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 11 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 11 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 12 | 12 | ||
| 13 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia { |
| 14 | class SyncpointManager; | ||
| 15 | |||
| 16 | namespace Devices { | ||
| 14 | class nvmap; | 17 | class nvmap; |
| 15 | 18 | ||
| 16 | class nvhost_nvdec_common : public nvdevice { | 19 | class nvhost_nvdec_common : public nvdevice { |
| 17 | public: | 20 | public: |
| 18 | explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 21 | explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 22 | SyncpointManager& syncpoint_manager); | ||
| 19 | ~nvhost_nvdec_common() override; | 23 | ~nvhost_nvdec_common() override; |
| 20 | 24 | ||
| 21 | protected: | 25 | protected: |
| @@ -157,8 +161,10 @@ protected: | |||
| 157 | s32_le nvmap_fd{}; | 161 | s32_le nvmap_fd{}; |
| 158 | u32_le submit_timeout{}; | 162 | u32_le submit_timeout{}; |
| 159 | std::shared_ptr<nvmap> nvmap_dev; | 163 | std::shared_ptr<nvmap> nvmap_dev; |
| 160 | 164 | SyncpointManager& syncpoint_manager; | |
| 165 | std::array<u32, MaxSyncPoints> device_syncpoints{}; | ||
| 161 | // This is expected to be ordered, therefore we must use a map, not unordered_map | 166 | // This is expected to be ordered, therefore we must use a map, not unordered_map |
| 162 | std::map<GPUVAddr, BufferMap> buffer_mappings; | 167 | std::map<GPUVAddr, BufferMap> buffer_mappings; |
| 163 | }; | 168 | }; |
| 164 | }; // namespace Service::Nvidia::Devices | 169 | }; // namespace Devices |
| 170 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 6f4ab0ab3..2d06955c0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp | |||
| @@ -13,8 +13,8 @@ namespace Service::Nvidia::Devices { | |||
| 13 | nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} | 13 | nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; | 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; |
| 15 | 15 | ||
| 16 | NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 16 | NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, |
| 17 | IoctlCtrl& ctrl) { | 17 | std::vector<u8>& output) { |
| 18 | switch (command.group) { | 18 | switch (command.group) { |
| 19 | case 'H': | 19 | case 'H': |
| 20 | switch (command.cmd) { | 20 | switch (command.cmd) { |
| @@ -33,14 +33,13 @@ NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std:: | |||
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input, | 35 | NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 36 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 36 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 37 | IoctlCtrl& ctrl) { | ||
| 38 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 37 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 39 | return NvResult::NotImplemented; | 38 | return NvResult::NotImplemented; |
| 40 | } | 39 | } |
| 41 | 40 | ||
| 42 | NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 41 | NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 43 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 42 | std::vector<u8>& inline_output) { |
| 44 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 43 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 45 | return NvResult::NotImplemented; | 44 | return NvResult::NotImplemented; |
| 46 | } | 45 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 6fb99d959..43948d18d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | |||
| @@ -16,13 +16,11 @@ public: | |||
| 16 | explicit nvhost_nvjpg(Core::System& system); | 16 | explicit nvhost_nvjpg(Core::System& system); |
| 17 | ~nvhost_nvjpg() override; | 17 | ~nvhost_nvjpg() override; |
| 18 | 18 | ||
| 19 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 19 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 20 | IoctlCtrl& ctrl) override; | ||
| 21 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 20 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 22 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 21 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 23 | IoctlCtrl& ctrl) override; | ||
| 24 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 22 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 25 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 23 | std::vector<u8>& inline_output) override; |
| 26 | 24 | ||
| 27 | private: | 25 | private: |
| 28 | struct IoctlSetNvmapFD { | 26 | struct IoctlSetNvmapFD { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 55a17f423..72499654c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -10,13 +10,13 @@ | |||
| 10 | #include "video_core/renderer_base.h" | 10 | #include "video_core/renderer_base.h" |
| 11 | 11 | ||
| 12 | namespace Service::Nvidia::Devices { | 12 | namespace Service::Nvidia::Devices { |
| 13 | nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | 13 | nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 14 | : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} | 14 | SyncpointManager& syncpoint_manager) |
| 15 | : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} | ||
| 15 | 16 | ||
| 16 | nvhost_vic::~nvhost_vic() = default; | 17 | nvhost_vic::~nvhost_vic() = default; |
| 17 | 18 | ||
| 18 | NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 19 | NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { |
| 19 | IoctlCtrl& ctrl) { | ||
| 20 | switch (command.group) { | 20 | switch (command.group) { |
| 21 | case 0x0: | 21 | case 0x0: |
| 22 | switch (command.cmd) { | 22 | switch (command.cmd) { |
| @@ -51,14 +51,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve | |||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input, | 53 | NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 54 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 54 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 55 | IoctlCtrl& ctrl) { | ||
| 56 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 55 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 57 | return NvResult::NotImplemented; | 56 | return NvResult::NotImplemented; |
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 59 | NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 61 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 60 | std::vector<u8>& inline_output) { |
| 62 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 61 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 63 | return NvResult::NotImplemented; | 62 | return NvResult::NotImplemented; |
| 64 | } | 63 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index 7f4858cd4..f401c61fa 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h | |||
| @@ -7,19 +7,17 @@ | |||
| 7 | #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" | 7 | #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" |
| 8 | 8 | ||
| 9 | namespace Service::Nvidia::Devices { | 9 | namespace Service::Nvidia::Devices { |
| 10 | class nvmap; | ||
| 11 | 10 | ||
| 12 | class nvhost_vic final : public nvhost_nvdec_common { | 11 | class nvhost_vic final : public nvhost_nvdec_common { |
| 13 | public: | 12 | public: |
| 14 | explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 13 | explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 14 | SyncpointManager& syncpoint_manager); | ||
| 15 | ~nvhost_vic(); | 15 | ~nvhost_vic(); |
| 16 | 16 | ||
| 17 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 17 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 18 | IoctlCtrl& ctrl) override; | ||
| 19 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 18 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 20 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 19 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 21 | IoctlCtrl& ctrl) override; | ||
| 22 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 20 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 23 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 21 | std::vector<u8>& inline_output) override; |
| 24 | }; | 22 | }; |
| 25 | } // namespace Service::Nvidia::Devices | 23 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 910cfee51..4015a2740 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -19,8 +19,7 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) { | |||
| 19 | 19 | ||
| 20 | nvmap::~nvmap() = default; | 20 | nvmap::~nvmap() = default; |
| 21 | 21 | ||
| 22 | NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 22 | NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { |
| 23 | IoctlCtrl& ctrl) { | ||
| 24 | switch (command.group) { | 23 | switch (command.group) { |
| 25 | case 0x1: | 24 | case 0x1: |
| 26 | switch (command.cmd) { | 25 | switch (command.cmd) { |
| @@ -49,14 +48,13 @@ NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector< | |||
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input, | 50 | NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 52 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 51 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 53 | IoctlCtrl& ctrl) { | ||
| 54 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 52 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 55 | return NvResult::NotImplemented; | 53 | return NvResult::NotImplemented; |
| 56 | } | 54 | } |
| 57 | 55 | ||
| 58 | NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 56 | NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 59 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 57 | std::vector<u8>& inline_output) { |
| 60 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); | 58 | UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); |
| 61 | return NvResult::NotImplemented; | 59 | return NvResult::NotImplemented; |
| 62 | } | 60 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index c0c2fa5eb..4484bd79f 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -19,13 +19,11 @@ public: | |||
| 19 | explicit nvmap(Core::System& system); | 19 | explicit nvmap(Core::System& system); |
| 20 | ~nvmap() override; | 20 | ~nvmap() override; |
| 21 | 21 | ||
| 22 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 22 | NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; |
| 23 | IoctlCtrl& ctrl) override; | ||
| 24 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, | 23 | NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, |
| 25 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 24 | const std::vector<u8>& inline_input, std::vector<u8>& output) override; |
| 26 | IoctlCtrl& ctrl) override; | ||
| 27 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, | 25 | NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 28 | std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; | 26 | std::vector<u8>& inline_output) override; |
| 29 | 27 | ||
| 30 | /// Returns the allocated address of an nvmap object given its handle. | 28 | /// Returns the allocated address of an nvmap object given its handle. |
| 31 | VAddr GetObjectAddress(u32 handle) const; | 29 | VAddr GetObjectAddress(u32 handle) const; |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index d72c531f6..cc23b001c 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -61,32 +61,9 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) { | |||
| 61 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); | 61 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); |
| 62 | const auto input_buffer = ctx.ReadBuffer(0); | 62 | const auto input_buffer = ctx.ReadBuffer(0); |
| 63 | 63 | ||
| 64 | IoctlCtrl ctrl{}; | 64 | const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); |
| 65 | 65 | if (command.is_out != 0) { | |
| 66 | const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer, ctrl); | 66 | ctx.WriteBuffer(output_buffer); |
| 67 | if (ctrl.must_delay) { | ||
| 68 | ctrl.fresh_call = false; | ||
| 69 | ctx.SleepClientThread( | ||
| 70 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 71 | [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, | ||
| 72 | Kernel::ThreadWakeupReason reason) { | ||
| 73 | IoctlCtrl ctrl2{ctrl}; | ||
| 74 | std::vector<u8> tmp_output = output_buffer; | ||
| 75 | const auto nv_result2 = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output, ctrl2); | ||
| 76 | |||
| 77 | if (command.is_out != 0) { | ||
| 78 | ctx.WriteBuffer(tmp_output); | ||
| 79 | } | ||
| 80 | |||
| 81 | IPC::ResponseBuilder rb{ctx_, 3}; | ||
| 82 | rb.Push(RESULT_SUCCESS); | ||
| 83 | rb.PushEnum(nv_result2); | ||
| 84 | }, | ||
| 85 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 86 | } else { | ||
| 87 | if (command.is_out != 0) { | ||
| 88 | ctx.WriteBuffer(output_buffer); | ||
| 89 | } | ||
| 90 | } | 67 | } |
| 91 | 68 | ||
| 92 | IPC::ResponseBuilder rb{ctx, 3}; | 69 | IPC::ResponseBuilder rb{ctx, 3}; |
| @@ -110,36 +87,8 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) { | |||
| 110 | const auto input_inlined_buffer = ctx.ReadBuffer(1); | 87 | const auto input_inlined_buffer = ctx.ReadBuffer(1); |
| 111 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); | 88 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); |
| 112 | 89 | ||
| 113 | IoctlCtrl ctrl{}; | ||
| 114 | |||
| 115 | const auto nv_result = | 90 | const auto nv_result = |
| 116 | nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer, ctrl); | 91 | nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); |
| 117 | if (ctrl.must_delay) { | ||
| 118 | ctrl.fresh_call = false; | ||
| 119 | ctx.SleepClientThread( | ||
| 120 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 121 | [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, | ||
| 122 | Kernel::ThreadWakeupReason reason) { | ||
| 123 | IoctlCtrl ctrl2{ctrl}; | ||
| 124 | std::vector<u8> tmp_output = output_buffer; | ||
| 125 | const auto nv_result2 = nvdrv->Ioctl2(fd, command, input_buffer, | ||
| 126 | input_inlined_buffer, tmp_output, ctrl2); | ||
| 127 | |||
| 128 | if (command.is_out != 0) { | ||
| 129 | ctx.WriteBuffer(tmp_output); | ||
| 130 | } | ||
| 131 | |||
| 132 | IPC::ResponseBuilder rb{ctx_, 3}; | ||
| 133 | rb.Push(RESULT_SUCCESS); | ||
| 134 | rb.PushEnum(nv_result2); | ||
| 135 | }, | ||
| 136 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 137 | } else { | ||
| 138 | if (command.is_out != 0) { | ||
| 139 | ctx.WriteBuffer(output_buffer); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | if (command.is_out != 0) { | 92 | if (command.is_out != 0) { |
| 144 | ctx.WriteBuffer(output_buffer); | 93 | ctx.WriteBuffer(output_buffer); |
| 145 | } | 94 | } |
| @@ -165,36 +114,11 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) { | |||
| 165 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); | 114 | std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); |
| 166 | std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); | 115 | std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); |
| 167 | 116 | ||
| 168 | IoctlCtrl ctrl{}; | ||
| 169 | const auto nv_result = | 117 | const auto nv_result = |
| 170 | nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline, ctrl); | 118 | nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline); |
| 171 | if (ctrl.must_delay) { | 119 | if (command.is_out != 0) { |
| 172 | ctrl.fresh_call = false; | 120 | ctx.WriteBuffer(output_buffer, 0); |
| 173 | ctx.SleepClientThread( | 121 | ctx.WriteBuffer(output_buffer_inline, 1); |
| 174 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 175 | [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, | ||
| 176 | Kernel::ThreadWakeupReason reason) { | ||
| 177 | IoctlCtrl ctrl2{ctrl}; | ||
| 178 | std::vector<u8> tmp_output = output_buffer; | ||
| 179 | std::vector<u8> tmp_output2 = output_buffer; | ||
| 180 | const auto nv_result2 = | ||
| 181 | nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output2, ctrl2); | ||
| 182 | |||
| 183 | if (command.is_out != 0) { | ||
| 184 | ctx.WriteBuffer(tmp_output, 0); | ||
| 185 | ctx.WriteBuffer(tmp_output2, 1); | ||
| 186 | } | ||
| 187 | |||
| 188 | IPC::ResponseBuilder rb{ctx_, 3}; | ||
| 189 | rb.Push(RESULT_SUCCESS); | ||
| 190 | rb.PushEnum(nv_result2); | ||
| 191 | }, | ||
| 192 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 193 | } else { | ||
| 194 | if (command.is_out != 0) { | ||
| 195 | ctx.WriteBuffer(output_buffer, 0); | ||
| 196 | ctx.WriteBuffer(output_buffer_inline, 1); | ||
| 197 | } | ||
| 198 | } | 122 | } |
| 199 | 123 | ||
| 200 | IPC::ResponseBuilder rb{ctx, 3}; | 124 | IPC::ResponseBuilder rb{ctx, 3}; |
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h index a3c4ecd85..3294bc0e7 100644 --- a/src/core/hle/service/nvdrv/nvdata.h +++ b/src/core/hle/service/nvdrv/nvdata.h | |||
| @@ -97,15 +97,4 @@ union Ioctl { | |||
| 97 | BitField<31, 1, u32> is_out; | 97 | BitField<31, 1, u32> is_out; |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| 100 | struct IoctlCtrl { | ||
| 101 | // First call done to the servioce for services that call itself again after a call. | ||
| 102 | bool fresh_call{true}; | ||
| 103 | // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep | ||
| 104 | bool must_delay{}; | ||
| 105 | // Timeout for the delay | ||
| 106 | s64 timeout{}; | ||
| 107 | // NV Event Id | ||
| 108 | s32 event_id{-1}; | ||
| 109 | }; | ||
| 110 | |||
| 111 | } // namespace Service::Nvidia | 100 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 8e0c9f093..620c18728 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { | |||
| 55 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); | 55 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |
| 56 | devices["/dev/nvhost-ctrl"] = | 56 | devices["/dev/nvhost-ctrl"] = |
| 57 | std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); | 57 | std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); |
| 58 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); | 58 | devices["/dev/nvhost-nvdec"] = |
| 59 | std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager); | ||
| 59 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); | 60 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |
| 60 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); | 61 | devices["/dev/nvhost-vic"] = |
| 62 | std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager); | ||
| 61 | } | 63 | } |
| 62 | 64 | ||
| 63 | Module::~Module() = default; | 65 | Module::~Module() = default; |
| @@ -91,7 +93,7 @@ DeviceFD Module::Open(const std::string& device_name) { | |||
| 91 | } | 93 | } |
| 92 | 94 | ||
| 93 | NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 95 | NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 94 | std::vector<u8>& output, IoctlCtrl& ctrl) { | 96 | std::vector<u8>& output) { |
| 95 | if (fd < 0) { | 97 | if (fd < 0) { |
| 96 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); | 98 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); |
| 97 | return NvResult::InvalidState; | 99 | return NvResult::InvalidState; |
| @@ -104,12 +106,11 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input | |||
| 104 | return NvResult::NotImplemented; | 106 | return NvResult::NotImplemented; |
| 105 | } | 107 | } |
| 106 | 108 | ||
| 107 | return itr->second->Ioctl1(command, input, output, ctrl); | 109 | return itr->second->Ioctl1(command, input, output); |
| 108 | } | 110 | } |
| 109 | 111 | ||
| 110 | NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 112 | NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 111 | const std::vector<u8>& inline_input, std::vector<u8>& output, | 113 | const std::vector<u8>& inline_input, std::vector<u8>& output) { |
| 112 | IoctlCtrl& ctrl) { | ||
| 113 | if (fd < 0) { | 114 | if (fd < 0) { |
| 114 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); | 115 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); |
| 115 | return NvResult::InvalidState; | 116 | return NvResult::InvalidState; |
| @@ -122,11 +123,11 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input | |||
| 122 | return NvResult::NotImplemented; | 123 | return NvResult::NotImplemented; |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 125 | return itr->second->Ioctl2(command, input, inline_input, output, ctrl); | 126 | return itr->second->Ioctl2(command, input, inline_input, output); |
| 126 | } | 127 | } |
| 127 | 128 | ||
| 128 | NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 129 | NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 129 | std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl) { | 130 | std::vector<u8>& output, std::vector<u8>& inline_output) { |
| 130 | if (fd < 0) { | 131 | if (fd < 0) { |
| 131 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); | 132 | LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); |
| 132 | return NvResult::InvalidState; | 133 | return NvResult::InvalidState; |
| @@ -139,7 +140,7 @@ NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input | |||
| 139 | return NvResult::NotImplemented; | 140 | return NvResult::NotImplemented; |
| 140 | } | 141 | } |
| 141 | 142 | ||
| 142 | return itr->second->Ioctl3(command, input, output, inline_output, ctrl); | 143 | return itr->second->Ioctl3(command, input, output, inline_output); |
| 143 | } | 144 | } |
| 144 | 145 | ||
| 145 | NvResult Module::Close(DeviceFD fd) { | 146 | NvResult Module::Close(DeviceFD fd) { |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 5985d2179..144e657e5 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -119,13 +119,13 @@ public: | |||
| 119 | 119 | ||
| 120 | /// Sends an ioctl command to the specified file descriptor. | 120 | /// Sends an ioctl command to the specified file descriptor. |
| 121 | NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 121 | NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 122 | std::vector<u8>& output, IoctlCtrl& ctrl); | 122 | std::vector<u8>& output); |
| 123 | 123 | ||
| 124 | NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 124 | NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 125 | const std::vector<u8>& inline_input, std::vector<u8>& output, IoctlCtrl& ctrl); | 125 | const std::vector<u8>& inline_input, std::vector<u8>& output); |
| 126 | 126 | ||
| 127 | NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 127 | NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| 128 | std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl); | 128 | std::vector<u8>& output, std::vector<u8>& inline_output); |
| 129 | 129 | ||
| 130 | /// Closes a device file descriptor and returns operation success. | 130 | /// Closes a device file descriptor and returns operation success. |
| 131 | NvResult Close(DeviceFD fd); | 131 | NvResult Close(DeviceFD fd); |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 377f47e8e..5578181a4 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -25,7 +25,12 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 25 | ASSERT(slot < buffer_slots); | 25 | ASSERT(slot < buffer_slots); |
| 26 | LOG_WARNING(Service, "Adding graphics buffer {}", slot); | 26 | LOG_WARNING(Service, "Adding graphics buffer {}", slot); |
| 27 | 27 | ||
| 28 | free_buffers.push_back(slot); | 28 | { |
| 29 | std::unique_lock lock{free_buffers_mutex}; | ||
| 30 | free_buffers.push_back(slot); | ||
| 31 | } | ||
| 32 | free_buffers_condition.notify_one(); | ||
| 33 | |||
| 29 | buffers[slot] = { | 34 | buffers[slot] = { |
| 30 | .slot = slot, | 35 | .slot = slot, |
| 31 | .status = Buffer::Status::Free, | 36 | .status = Buffer::Status::Free, |
| @@ -41,10 +46,20 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 41 | 46 | ||
| 42 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, | 47 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, |
| 43 | u32 height) { | 48 | u32 height) { |
| 49 | // Wait for first request before trying to dequeue | ||
| 50 | { | ||
| 51 | std::unique_lock lock{free_buffers_mutex}; | ||
| 52 | free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; }); | ||
| 53 | } | ||
| 44 | 54 | ||
| 45 | if (free_buffers.empty()) { | 55 | if (!is_connect) { |
| 56 | // Buffer was disconnected while the thread was blocked, this is most likely due to | ||
| 57 | // emulation being stopped | ||
| 46 | return std::nullopt; | 58 | return std::nullopt; |
| 47 | } | 59 | } |
| 60 | |||
| 61 | std::unique_lock lock{free_buffers_mutex}; | ||
| 62 | |||
| 48 | auto f_itr = free_buffers.begin(); | 63 | auto f_itr = free_buffers.begin(); |
| 49 | auto slot = buffers.size(); | 64 | auto slot = buffers.size(); |
| 50 | 65 | ||
| @@ -85,6 +100,7 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | |||
| 85 | buffers[slot].crop_rect = crop_rect; | 100 | buffers[slot].crop_rect = crop_rect; |
| 86 | buffers[slot].swap_interval = swap_interval; | 101 | buffers[slot].swap_interval = swap_interval; |
| 87 | buffers[slot].multi_fence = multi_fence; | 102 | buffers[slot].multi_fence = multi_fence; |
| 103 | std::unique_lock lock{queue_sequence_mutex}; | ||
| 88 | queue_sequence.push_back(slot); | 104 | queue_sequence.push_back(slot); |
| 89 | } | 105 | } |
| 90 | 106 | ||
| @@ -97,12 +113,17 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult | |||
| 97 | buffers[slot].multi_fence = multi_fence; | 113 | buffers[slot].multi_fence = multi_fence; |
| 98 | buffers[slot].swap_interval = 0; | 114 | buffers[slot].swap_interval = 0; |
| 99 | 115 | ||
| 100 | free_buffers.push_back(slot); | 116 | { |
| 117 | std::unique_lock lock{free_buffers_mutex}; | ||
| 118 | free_buffers.push_back(slot); | ||
| 119 | } | ||
| 120 | free_buffers_condition.notify_one(); | ||
| 101 | 121 | ||
| 102 | buffer_wait_event.writable->Signal(); | 122 | buffer_wait_event.writable->Signal(); |
| 103 | } | 123 | } |
| 104 | 124 | ||
| 105 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { | 125 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { |
| 126 | std::unique_lock lock{queue_sequence_mutex}; | ||
| 106 | std::size_t buffer_slot = buffers.size(); | 127 | std::size_t buffer_slot = buffers.size(); |
| 107 | // Iterate to find a queued buffer matching the requested slot. | 128 | // Iterate to find a queued buffer matching the requested slot. |
| 108 | while (buffer_slot == buffers.size() && !queue_sequence.empty()) { | 129 | while (buffer_slot == buffers.size() && !queue_sequence.empty()) { |
| @@ -127,15 +148,30 @@ void BufferQueue::ReleaseBuffer(u32 slot) { | |||
| 127 | ASSERT(buffers[slot].slot == slot); | 148 | ASSERT(buffers[slot].slot == slot); |
| 128 | 149 | ||
| 129 | buffers[slot].status = Buffer::Status::Free; | 150 | buffers[slot].status = Buffer::Status::Free; |
| 130 | free_buffers.push_back(slot); | 151 | { |
| 152 | std::unique_lock lock{free_buffers_mutex}; | ||
| 153 | free_buffers.push_back(slot); | ||
| 154 | } | ||
| 155 | free_buffers_condition.notify_one(); | ||
| 131 | 156 | ||
| 132 | buffer_wait_event.writable->Signal(); | 157 | buffer_wait_event.writable->Signal(); |
| 133 | } | 158 | } |
| 134 | 159 | ||
| 160 | void BufferQueue::Connect() { | ||
| 161 | std::unique_lock lock{queue_sequence_mutex}; | ||
| 162 | queue_sequence.clear(); | ||
| 163 | is_connect = true; | ||
| 164 | } | ||
| 165 | |||
| 135 | void BufferQueue::Disconnect() { | 166 | void BufferQueue::Disconnect() { |
| 136 | buffers.fill({}); | 167 | buffers.fill({}); |
| 137 | queue_sequence.clear(); | 168 | { |
| 169 | std::unique_lock lock{queue_sequence_mutex}; | ||
| 170 | queue_sequence.clear(); | ||
| 171 | } | ||
| 138 | buffer_wait_event.writable->Signal(); | 172 | buffer_wait_event.writable->Signal(); |
| 173 | is_connect = false; | ||
| 174 | free_buffers_condition.notify_one(); | ||
| 139 | } | 175 | } |
| 140 | 176 | ||
| 141 | u32 BufferQueue::Query(QueryType type) { | 177 | u32 BufferQueue::Query(QueryType type) { |
| @@ -144,9 +180,11 @@ u32 BufferQueue::Query(QueryType type) { | |||
| 144 | switch (type) { | 180 | switch (type) { |
| 145 | case QueryType::NativeWindowFormat: | 181 | case QueryType::NativeWindowFormat: |
| 146 | return static_cast<u32>(PixelFormat::RGBA8888); | 182 | return static_cast<u32>(PixelFormat::RGBA8888); |
| 183 | case QueryType::NativeWindowWidth: | ||
| 184 | case QueryType::NativeWindowHeight: | ||
| 185 | break; | ||
| 147 | } | 186 | } |
| 148 | 187 | UNIMPLEMENTED_MSG("Unimplemented query type={}", type); | |
| 149 | UNIMPLEMENTED(); | ||
| 150 | return 0; | 188 | return 0; |
| 151 | } | 189 | } |
| 152 | 190 | ||
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index e610923cb..ad7469277 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <condition_variable> | ||
| 7 | #include <list> | 8 | #include <list> |
| 9 | #include <mutex> | ||
| 8 | #include <optional> | 10 | #include <optional> |
| 9 | #include <vector> | 11 | #include <vector> |
| 10 | 12 | ||
| @@ -99,6 +101,7 @@ public: | |||
| 99 | void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence); | 101 | void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence); |
| 100 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); | 102 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); |
| 101 | void ReleaseBuffer(u32 slot); | 103 | void ReleaseBuffer(u32 slot); |
| 104 | void Connect(); | ||
| 102 | void Disconnect(); | 105 | void Disconnect(); |
| 103 | u32 Query(QueryType type); | 106 | u32 Query(QueryType type); |
| 104 | 107 | ||
| @@ -106,18 +109,30 @@ public: | |||
| 106 | return id; | 109 | return id; |
| 107 | } | 110 | } |
| 108 | 111 | ||
| 112 | bool IsConnected() const { | ||
| 113 | return is_connect; | ||
| 114 | } | ||
| 115 | |||
| 109 | std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const; | 116 | std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const; |
| 110 | 117 | ||
| 111 | std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const; | 118 | std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const; |
| 112 | 119 | ||
| 113 | private: | 120 | private: |
| 114 | u32 id; | 121 | BufferQueue(const BufferQueue&) = delete; |
| 115 | u64 layer_id; | 122 | |
| 123 | u32 id{}; | ||
| 124 | u64 layer_id{}; | ||
| 125 | std::atomic_bool is_connect{}; | ||
| 116 | 126 | ||
| 117 | std::list<u32> free_buffers; | 127 | std::list<u32> free_buffers; |
| 118 | std::array<Buffer, buffer_slots> buffers; | 128 | std::array<Buffer, buffer_slots> buffers; |
| 119 | std::list<u32> queue_sequence; | 129 | std::list<u32> queue_sequence; |
| 120 | Kernel::EventPair buffer_wait_event; | 130 | Kernel::EventPair buffer_wait_event; |
| 131 | |||
| 132 | std::mutex free_buffers_mutex; | ||
| 133 | std::condition_variable free_buffers_condition; | ||
| 134 | |||
| 135 | std::mutex queue_sequence_mutex; | ||
| 121 | }; | 136 | }; |
| 122 | 137 | ||
| 123 | } // namespace Service::NVFlinger | 138 | } // namespace Service::NVFlinger |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 44aa2bdae..4b3581949 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -88,6 +88,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { | |||
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | NVFlinger::~NVFlinger() { | 90 | NVFlinger::~NVFlinger() { |
| 91 | for (auto& buffer_queue : buffer_queues) { | ||
| 92 | buffer_queue->Disconnect(); | ||
| 93 | } | ||
| 94 | |||
| 91 | if (system.IsMulticore()) { | 95 | if (system.IsMulticore()) { |
| 92 | is_running = false; | 96 | is_running = false; |
| 93 | wait_event->Set(); | 97 | wait_event->Set(); |
| @@ -104,6 +108,8 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { | |||
| 104 | } | 108 | } |
| 105 | 109 | ||
| 106 | std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { | 110 | std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { |
| 111 | const auto guard = Lock(); | ||
| 112 | |||
| 107 | LOG_DEBUG(Service, "Opening \"{}\" display", name); | 113 | LOG_DEBUG(Service, "Opening \"{}\" display", name); |
| 108 | 114 | ||
| 109 | // TODO(Subv): Currently we only support the Default display. | 115 | // TODO(Subv): Currently we only support the Default display. |
| @@ -121,6 +127,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { | |||
| 121 | } | 127 | } |
| 122 | 128 | ||
| 123 | std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { | 129 | std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { |
| 130 | const auto guard = Lock(); | ||
| 124 | auto* const display = FindDisplay(display_id); | 131 | auto* const display = FindDisplay(display_id); |
| 125 | 132 | ||
| 126 | if (display == nullptr) { | 133 | if (display == nullptr) { |
| @@ -129,18 +136,22 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { | |||
| 129 | 136 | ||
| 130 | const u64 layer_id = next_layer_id++; | 137 | const u64 layer_id = next_layer_id++; |
| 131 | const u32 buffer_queue_id = next_buffer_queue_id++; | 138 | const u32 buffer_queue_id = next_buffer_queue_id++; |
| 132 | buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); | 139 | buffer_queues.emplace_back( |
| 133 | display->CreateLayer(layer_id, buffer_queues.back()); | 140 | std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id)); |
| 141 | display->CreateLayer(layer_id, *buffer_queues.back()); | ||
| 134 | return layer_id; | 142 | return layer_id; |
| 135 | } | 143 | } |
| 136 | 144 | ||
| 137 | void NVFlinger::CloseLayer(u64 layer_id) { | 145 | void NVFlinger::CloseLayer(u64 layer_id) { |
| 146 | const auto guard = Lock(); | ||
| 147 | |||
| 138 | for (auto& display : displays) { | 148 | for (auto& display : displays) { |
| 139 | display.CloseLayer(layer_id); | 149 | display.CloseLayer(layer_id); |
| 140 | } | 150 | } |
| 141 | } | 151 | } |
| 142 | 152 | ||
| 143 | std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { | 153 | std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { |
| 154 | const auto guard = Lock(); | ||
| 144 | const auto* const layer = FindLayer(display_id, layer_id); | 155 | const auto* const layer = FindLayer(display_id, layer_id); |
| 145 | 156 | ||
| 146 | if (layer == nullptr) { | 157 | if (layer == nullptr) { |
| @@ -151,6 +162,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co | |||
| 151 | } | 162 | } |
| 152 | 163 | ||
| 153 | std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { | 164 | std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { |
| 165 | const auto guard = Lock(); | ||
| 154 | auto* const display = FindDisplay(display_id); | 166 | auto* const display = FindDisplay(display_id); |
| 155 | 167 | ||
| 156 | if (display == nullptr) { | 168 | if (display == nullptr) { |
| @@ -160,20 +172,16 @@ std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) | |||
| 160 | return display->GetVSyncEvent(); | 172 | return display->GetVSyncEvent(); |
| 161 | } | 173 | } |
| 162 | 174 | ||
| 163 | BufferQueue& NVFlinger::FindBufferQueue(u32 id) { | 175 | BufferQueue* NVFlinger::FindBufferQueue(u32 id) { |
| 176 | const auto guard = Lock(); | ||
| 164 | const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), | 177 | const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), |
| 165 | [id](const auto& queue) { return queue.GetId() == id; }); | 178 | [id](const auto& queue) { return queue->GetId() == id; }); |
| 166 | 179 | ||
| 167 | ASSERT(itr != buffer_queues.end()); | 180 | if (itr == buffer_queues.end()) { |
| 168 | return *itr; | 181 | return nullptr; |
| 169 | } | 182 | } |
| 170 | |||
| 171 | const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const { | ||
| 172 | const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), | ||
| 173 | [id](const auto& queue) { return queue.GetId() == id; }); | ||
| 174 | 183 | ||
| 175 | ASSERT(itr != buffer_queues.end()); | 184 | return itr->get(); |
| 176 | return *itr; | ||
| 177 | } | 185 | } |
| 178 | 186 | ||
| 179 | VI::Display* NVFlinger::FindDisplay(u64 display_id) { | 187 | VI::Display* NVFlinger::FindDisplay(u64 display_id) { |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 1ebe949c0..c6765259f 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -75,10 +75,7 @@ public: | |||
| 75 | [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; | 75 | [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; |
| 76 | 76 | ||
| 77 | /// Obtains a buffer queue identified by the ID. | 77 | /// Obtains a buffer queue identified by the ID. |
| 78 | [[nodiscard]] BufferQueue& FindBufferQueue(u32 id); | 78 | [[nodiscard]] BufferQueue* FindBufferQueue(u32 id); |
| 79 | |||
| 80 | /// Obtains a buffer queue identified by the ID. | ||
| 81 | [[nodiscard]] const BufferQueue& FindBufferQueue(u32 id) const; | ||
| 82 | 79 | ||
| 83 | /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when | 80 | /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when |
| 84 | /// finished. | 81 | /// finished. |
| @@ -86,11 +83,11 @@ public: | |||
| 86 | 83 | ||
| 87 | [[nodiscard]] s64 GetNextTicks() const; | 84 | [[nodiscard]] s64 GetNextTicks() const; |
| 88 | 85 | ||
| 86 | private: | ||
| 89 | [[nodiscard]] std::unique_lock<std::mutex> Lock() const { | 87 | [[nodiscard]] std::unique_lock<std::mutex> Lock() const { |
| 90 | return std::unique_lock{*guard}; | 88 | return std::unique_lock{*guard}; |
| 91 | } | 89 | } |
| 92 | 90 | ||
| 93 | private: | ||
| 94 | /// Finds the display identified by the specified ID. | 91 | /// Finds the display identified by the specified ID. |
| 95 | [[nodiscard]] VI::Display* FindDisplay(u64 display_id); | 92 | [[nodiscard]] VI::Display* FindDisplay(u64 display_id); |
| 96 | 93 | ||
| @@ -110,7 +107,7 @@ private: | |||
| 110 | std::shared_ptr<Nvidia::Module> nvdrv; | 107 | std::shared_ptr<Nvidia::Module> nvdrv; |
| 111 | 108 | ||
| 112 | std::vector<VI::Display> displays; | 109 | std::vector<VI::Display> displays; |
| 113 | std::vector<BufferQueue> buffer_queues; | 110 | std::vector<std::unique_ptr<BufferQueue>> buffer_queues; |
| 114 | 111 | ||
| 115 | /// Id to use for the next layer that is created, this counter is shared among all displays. | 112 | /// Id to use for the next layer that is created, this counter is shared among all displays. |
| 116 | u64 next_layer_id = 1; | 113 | u64 next_layer_id = 1; |
diff --git a/src/core/hle/service/pcie/pcie.cpp b/src/core/hle/service/pcie/pcie.cpp index 80c0fc7ac..f6686fc4d 100644 --- a/src/core/hle/service/pcie/pcie.cpp +++ b/src/core/hle/service/pcie/pcie.cpp | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | 48 | ||
| 49 | class PCIe final : public ServiceFramework<PCIe> { | 49 | class PCIe final : public ServiceFramework<PCIe> { |
| 50 | public: | 50 | public: |
| 51 | explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} { | 51 | explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} { |
| 52 | // clang-format off | 52 | // clang-format off |
| 53 | static const FunctionInfo functions[] = { | 53 | static const FunctionInfo functions[] = { |
| 54 | {0, nullptr, "RegisterClassDriver"}, | 54 | {0, nullptr, "RegisterClassDriver"}, |
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index abf3d1ea3..ff2a5b1db 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -95,9 +95,14 @@ ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* se | |||
| 95 | : system{system_}, service_name{service_name_}, max_sessions{max_sessions_}, | 95 | : system{system_}, service_name{service_name_}, max_sessions{max_sessions_}, |
| 96 | handler_invoker{handler_invoker_} {} | 96 | handler_invoker{handler_invoker_} {} |
| 97 | 97 | ||
| 98 | ServiceFrameworkBase::~ServiceFrameworkBase() = default; | 98 | ServiceFrameworkBase::~ServiceFrameworkBase() { |
| 99 | // Wait for other threads to release access before destroying | ||
| 100 | const auto guard = LockService(); | ||
| 101 | } | ||
| 99 | 102 | ||
| 100 | void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { | 103 | void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { |
| 104 | const auto guard = LockService(); | ||
| 105 | |||
| 101 | ASSERT(!port_installed); | 106 | ASSERT(!port_installed); |
| 102 | 107 | ||
| 103 | auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); | 108 | auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); |
| @@ -106,6 +111,8 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) | |||
| 106 | } | 111 | } |
| 107 | 112 | ||
| 108 | void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { | 113 | void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { |
| 114 | const auto guard = LockService(); | ||
| 115 | |||
| 109 | ASSERT(!port_installed); | 116 | ASSERT(!port_installed); |
| 110 | 117 | ||
| 111 | auto [server_port, client_port] = | 118 | auto [server_port, client_port] = |
| @@ -115,17 +122,6 @@ void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { | |||
| 115 | port_installed = true; | 122 | port_installed = true; |
| 116 | } | 123 | } |
| 117 | 124 | ||
| 118 | std::shared_ptr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort(Kernel::KernelCore& kernel) { | ||
| 119 | ASSERT(!port_installed); | ||
| 120 | |||
| 121 | auto [server_port, client_port] = | ||
| 122 | Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name); | ||
| 123 | auto port = MakeResult(std::move(server_port)).Unwrap(); | ||
| 124 | port->SetHleHandler(shared_from_this()); | ||
| 125 | port_installed = true; | ||
| 126 | return client_port; | ||
| 127 | } | ||
| 128 | |||
| 129 | void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) { | 125 | void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) { |
| 130 | handlers.reserve(handlers.size() + n); | 126 | handlers.reserve(handlers.size() + n); |
| 131 | for (std::size_t i = 0; i < n; ++i) { | 127 | for (std::size_t i = 0; i < n; ++i) { |
| @@ -164,6 +160,8 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) { | |||
| 164 | } | 160 | } |
| 165 | 161 | ||
| 166 | ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) { | 162 | ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) { |
| 163 | const auto guard = LockService(); | ||
| 164 | |||
| 167 | switch (context.GetCommandType()) { | 165 | switch (context.GetCommandType()) { |
| 168 | case IPC::CommandType::Close: { | 166 | case IPC::CommandType::Close: { |
| 169 | IPC::ResponseBuilder rb{context, 2}; | 167 | IPC::ResponseBuilder rb{context, 2}; |
| @@ -184,7 +182,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co | |||
| 184 | UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType()); | 182 | UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType()); |
| 185 | } | 183 | } |
| 186 | 184 | ||
| 187 | context.WriteToOutgoingCommandBuffer(context.GetThread()); | 185 | // If emulation was shutdown, we are closing service threads, do not write the response back to |
| 186 | // memory that may be shutting down as well. | ||
| 187 | if (system.IsPoweredOn()) { | ||
| 188 | context.WriteToOutgoingCommandBuffer(context.GetThread()); | ||
| 189 | } | ||
| 188 | 190 | ||
| 189 | return RESULT_SUCCESS; | 191 | return RESULT_SUCCESS; |
| 190 | } | 192 | } |
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 62a182310..916445517 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h | |||
| @@ -5,9 +5,11 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <mutex> | ||
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <boost/container/flat_map.hpp> | 10 | #include <boost/container/flat_map.hpp> |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/spin_lock.h" | ||
| 11 | #include "core/hle/kernel/hle_ipc.h" | 13 | #include "core/hle/kernel/hle_ipc.h" |
| 12 | #include "core/hle/kernel/object.h" | 14 | #include "core/hle/kernel/object.h" |
| 13 | 15 | ||
| @@ -68,11 +70,9 @@ public: | |||
| 68 | void InstallAsService(SM::ServiceManager& service_manager); | 70 | void InstallAsService(SM::ServiceManager& service_manager); |
| 69 | /// Creates a port pair and registers it on the kernel's global port registry. | 71 | /// Creates a port pair and registers it on the kernel's global port registry. |
| 70 | void InstallAsNamedPort(Kernel::KernelCore& kernel); | 72 | void InstallAsNamedPort(Kernel::KernelCore& kernel); |
| 71 | /// Creates and returns an unregistered port for the service. | 73 | /// Invokes a service request routine. |
| 72 | std::shared_ptr<Kernel::ClientPort> CreatePort(Kernel::KernelCore& kernel); | ||
| 73 | |||
| 74 | void InvokeRequest(Kernel::HLERequestContext& ctx); | 74 | void InvokeRequest(Kernel::HLERequestContext& ctx); |
| 75 | 75 | /// Handles a synchronization request for the service. | |
| 76 | ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override; | 76 | ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override; |
| 77 | 77 | ||
| 78 | protected: | 78 | protected: |
| @@ -80,6 +80,11 @@ protected: | |||
| 80 | template <typename Self> | 80 | template <typename Self> |
| 81 | using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&); | 81 | using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&); |
| 82 | 82 | ||
| 83 | /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread. | ||
| 84 | [[nodiscard]] std::scoped_lock<Common::SpinLock> LockService() { | ||
| 85 | return std::scoped_lock{lock_service}; | ||
| 86 | } | ||
| 87 | |||
| 83 | /// System context that the service operates under. | 88 | /// System context that the service operates under. |
| 84 | Core::System& system; | 89 | Core::System& system; |
| 85 | 90 | ||
| @@ -115,6 +120,9 @@ private: | |||
| 115 | /// Function used to safely up-cast pointers to the derived class before invoking a handler. | 120 | /// Function used to safely up-cast pointers to the derived class before invoking a handler. |
| 116 | InvokerFn* handler_invoker; | 121 | InvokerFn* handler_invoker; |
| 117 | boost::container::flat_map<u32, FunctionInfoBase> handlers; | 122 | boost::container::flat_map<u32, FunctionInfoBase> handlers; |
| 123 | |||
| 124 | /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread. | ||
| 125 | Common::SpinLock lock_service; | ||
| 118 | }; | 126 | }; |
| 119 | 127 | ||
| 120 | /** | 128 | /** |
diff --git a/src/core/hle/service/sockets/blocking_worker.h b/src/core/hle/service/sockets/blocking_worker.h deleted file mode 100644 index 2d53e52b6..000000000 --- a/src/core/hle/service/sockets/blocking_worker.h +++ /dev/null | |||
| @@ -1,161 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <atomic> | ||
| 8 | #include <memory> | ||
| 9 | #include <string> | ||
| 10 | #include <string_view> | ||
| 11 | #include <thread> | ||
| 12 | #include <variant> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include <fmt/format.h> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/microprofile.h" | ||
| 19 | #include "common/thread.h" | ||
| 20 | #include "core/core.h" | ||
| 21 | #include "core/hle/kernel/hle_ipc.h" | ||
| 22 | #include "core/hle/kernel/kernel.h" | ||
| 23 | #include "core/hle/kernel/thread.h" | ||
| 24 | #include "core/hle/kernel/writable_event.h" | ||
| 25 | |||
| 26 | namespace Service::Sockets { | ||
| 27 | |||
| 28 | /** | ||
| 29 | * Worker abstraction to execute blocking calls on host without blocking the guest thread | ||
| 30 | * | ||
| 31 | * @tparam Service Service where the work is executed | ||
| 32 | * @tparam Types Types of work to execute | ||
| 33 | */ | ||
| 34 | template <class Service, class... Types> | ||
| 35 | class BlockingWorker { | ||
| 36 | using This = BlockingWorker<Service, Types...>; | ||
| 37 | using WorkVariant = std::variant<std::monostate, Types...>; | ||
| 38 | |||
| 39 | public: | ||
| 40 | /// Create a new worker | ||
| 41 | static std::unique_ptr<This> Create(Core::System& system, Service* service, | ||
| 42 | std::string_view name) { | ||
| 43 | return std::unique_ptr<This>(new This(system, service, name)); | ||
| 44 | } | ||
| 45 | |||
| 46 | ~BlockingWorker() { | ||
| 47 | while (!is_available.load(std::memory_order_relaxed)) { | ||
| 48 | // Busy wait until work is finished | ||
| 49 | std::this_thread::yield(); | ||
| 50 | } | ||
| 51 | // Monostate means to exit the thread | ||
| 52 | work = std::monostate{}; | ||
| 53 | work_event.Set(); | ||
| 54 | thread.join(); | ||
| 55 | } | ||
| 56 | |||
| 57 | /** | ||
| 58 | * Try to capture the worker to send work after a success | ||
| 59 | * @returns True when the worker has been successfully captured | ||
| 60 | */ | ||
| 61 | bool TryCapture() { | ||
| 62 | bool expected = true; | ||
| 63 | return is_available.compare_exchange_weak(expected, false, std::memory_order_relaxed, | ||
| 64 | std::memory_order_relaxed); | ||
| 65 | } | ||
| 66 | |||
| 67 | /** | ||
| 68 | * Send work to this worker abstraction | ||
| 69 | * @see TryCapture must be called before attempting to call this function | ||
| 70 | */ | ||
| 71 | template <class Work> | ||
| 72 | void SendWork(Work new_work) { | ||
| 73 | ASSERT_MSG(!is_available, "Trying to send work on a worker that's not captured"); | ||
| 74 | work = std::move(new_work); | ||
| 75 | work_event.Set(); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Generate a callback for @see SleepClientThread | ||
| 79 | template <class Work> | ||
| 80 | auto Callback() { | ||
| 81 | return [this](std::shared_ptr<Kernel::Thread>, Kernel::HLERequestContext& ctx, | ||
| 82 | Kernel::ThreadWakeupReason reason) { | ||
| 83 | ASSERT(reason == Kernel::ThreadWakeupReason::Signal); | ||
| 84 | std::get<Work>(work).Response(ctx); | ||
| 85 | is_available.store(true); | ||
| 86 | }; | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Get kernel event that will be signalled by the worker when the host operation finishes | ||
| 90 | std::shared_ptr<Kernel::WritableEvent> KernelEvent() const { | ||
| 91 | return kernel_event; | ||
| 92 | } | ||
| 93 | |||
| 94 | private: | ||
| 95 | explicit BlockingWorker(Core::System& system, Service* service, std::string_view name) { | ||
| 96 | auto pair = Kernel::WritableEvent::CreateEventPair(system.Kernel(), std::string(name)); | ||
| 97 | kernel_event = std::move(pair.writable); | ||
| 98 | thread = std::thread([this, &system, service, name] { Run(system, service, name); }); | ||
| 99 | } | ||
| 100 | |||
| 101 | void Run(Core::System& system, Service* service, std::string_view name) { | ||
| 102 | system.RegisterHostThread(); | ||
| 103 | |||
| 104 | const std::string thread_name = fmt::format("yuzu:{}", name); | ||
| 105 | MicroProfileOnThreadCreate(thread_name.c_str()); | ||
| 106 | Common::SetCurrentThreadName(thread_name.c_str()); | ||
| 107 | |||
| 108 | bool keep_running = true; | ||
| 109 | while (keep_running) { | ||
| 110 | work_event.Wait(); | ||
| 111 | |||
| 112 | const auto visit_fn = [service, &keep_running]<typename T>(T&& w) { | ||
| 113 | if constexpr (std::is_same_v<std::decay_t<T>, std::monostate>) { | ||
| 114 | keep_running = false; | ||
| 115 | } else { | ||
| 116 | w.Execute(service); | ||
| 117 | } | ||
| 118 | }; | ||
| 119 | std::visit(visit_fn, work); | ||
| 120 | |||
| 121 | kernel_event->Signal(); | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | std::thread thread; | ||
| 126 | WorkVariant work; | ||
| 127 | Common::Event work_event; | ||
| 128 | std::shared_ptr<Kernel::WritableEvent> kernel_event; | ||
| 129 | std::atomic_bool is_available{true}; | ||
| 130 | }; | ||
| 131 | |||
| 132 | template <class Service, class... Types> | ||
| 133 | class BlockingWorkerPool { | ||
| 134 | using Worker = BlockingWorker<Service, Types...>; | ||
| 135 | |||
| 136 | public: | ||
| 137 | explicit BlockingWorkerPool(Core::System& system_, Service* service_) | ||
| 138 | : system{system_}, service{service_} {} | ||
| 139 | |||
| 140 | /// Returns a captured worker thread, creating new ones if necessary | ||
| 141 | Worker* CaptureWorker() { | ||
| 142 | for (auto& worker : workers) { | ||
| 143 | if (worker->TryCapture()) { | ||
| 144 | return worker.get(); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | auto new_worker = Worker::Create(system, service, fmt::format("BSD:{}", workers.size())); | ||
| 148 | [[maybe_unused]] const bool success = new_worker->TryCapture(); | ||
| 149 | ASSERT(success); | ||
| 150 | |||
| 151 | return workers.emplace_back(std::move(new_worker)).get(); | ||
| 152 | } | ||
| 153 | |||
| 154 | private: | ||
| 155 | Core::System& system; | ||
| 156 | Service* const service; | ||
| 157 | |||
| 158 | std::vector<std::unique_ptr<Worker>> workers; | ||
| 159 | }; | ||
| 160 | |||
| 161 | } // namespace Service::Sockets | ||
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 67b419503..2b824059d 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp | |||
| @@ -178,13 +178,12 @@ void BSD::Poll(Kernel::HLERequestContext& ctx) { | |||
| 178 | 178 | ||
| 179 | LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout); | 179 | LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout); |
| 180 | 180 | ||
| 181 | ExecuteWork(ctx, "BSD:Poll", timeout != 0, | 181 | ExecuteWork(ctx, PollWork{ |
| 182 | PollWork{ | 182 | .nfds = nfds, |
| 183 | .nfds = nfds, | 183 | .timeout = timeout, |
| 184 | .timeout = timeout, | 184 | .read_buffer = ctx.ReadBuffer(), |
| 185 | .read_buffer = ctx.ReadBuffer(), | 185 | .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), |
| 186 | .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), | 186 | }); |
| 187 | }); | ||
| 188 | } | 187 | } |
| 189 | 188 | ||
| 190 | void BSD::Accept(Kernel::HLERequestContext& ctx) { | 189 | void BSD::Accept(Kernel::HLERequestContext& ctx) { |
| @@ -193,11 +192,10 @@ void BSD::Accept(Kernel::HLERequestContext& ctx) { | |||
| 193 | 192 | ||
| 194 | LOG_DEBUG(Service, "called. fd={}", fd); | 193 | LOG_DEBUG(Service, "called. fd={}", fd); |
| 195 | 194 | ||
| 196 | ExecuteWork(ctx, "BSD:Accept", IsBlockingSocket(fd), | 195 | ExecuteWork(ctx, AcceptWork{ |
| 197 | AcceptWork{ | 196 | .fd = fd, |
| 198 | .fd = fd, | 197 | .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), |
| 199 | .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), | 198 | }); |
| 200 | }); | ||
| 201 | } | 199 | } |
| 202 | 200 | ||
| 203 | void BSD::Bind(Kernel::HLERequestContext& ctx) { | 201 | void BSD::Bind(Kernel::HLERequestContext& ctx) { |
| @@ -215,11 +213,10 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) { | |||
| 215 | 213 | ||
| 216 | LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize()); | 214 | LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize()); |
| 217 | 215 | ||
| 218 | ExecuteWork(ctx, "BSD:Connect", IsBlockingSocket(fd), | 216 | ExecuteWork(ctx, ConnectWork{ |
| 219 | ConnectWork{ | 217 | .fd = fd, |
| 220 | .fd = fd, | 218 | .addr = ctx.ReadBuffer(), |
| 221 | .addr = ctx.ReadBuffer(), | 219 | }); |
| 222 | }); | ||
| 223 | } | 220 | } |
| 224 | 221 | ||
| 225 | void BSD::GetPeerName(Kernel::HLERequestContext& ctx) { | 222 | void BSD::GetPeerName(Kernel::HLERequestContext& ctx) { |
| @@ -327,12 +324,11 @@ void BSD::Recv(Kernel::HLERequestContext& ctx) { | |||
| 327 | 324 | ||
| 328 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize()); | 325 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize()); |
| 329 | 326 | ||
| 330 | ExecuteWork(ctx, "BSD:Recv", IsBlockingSocket(fd), | 327 | ExecuteWork(ctx, RecvWork{ |
| 331 | RecvWork{ | 328 | .fd = fd, |
| 332 | .fd = fd, | 329 | .flags = flags, |
| 333 | .flags = flags, | 330 | .message = std::vector<u8>(ctx.GetWriteBufferSize()), |
| 334 | .message = std::vector<u8>(ctx.GetWriteBufferSize()), | 331 | }); |
| 335 | }); | ||
| 336 | } | 332 | } |
| 337 | 333 | ||
| 338 | void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { | 334 | void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { |
| @@ -344,13 +340,12 @@ void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { | |||
| 344 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags, | 340 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags, |
| 345 | ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1)); | 341 | ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1)); |
| 346 | 342 | ||
| 347 | ExecuteWork(ctx, "BSD:RecvFrom", IsBlockingSocket(fd), | 343 | ExecuteWork(ctx, RecvFromWork{ |
| 348 | RecvFromWork{ | 344 | .fd = fd, |
| 349 | .fd = fd, | 345 | .flags = flags, |
| 350 | .flags = flags, | 346 | .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), |
| 351 | .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), | 347 | .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), |
| 352 | .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), | 348 | }); |
| 353 | }); | ||
| 354 | } | 349 | } |
| 355 | 350 | ||
| 356 | void BSD::Send(Kernel::HLERequestContext& ctx) { | 351 | void BSD::Send(Kernel::HLERequestContext& ctx) { |
| @@ -361,12 +356,11 @@ void BSD::Send(Kernel::HLERequestContext& ctx) { | |||
| 361 | 356 | ||
| 362 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize()); | 357 | LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize()); |
| 363 | 358 | ||
| 364 | ExecuteWork(ctx, "BSD:Send", IsBlockingSocket(fd), | 359 | ExecuteWork(ctx, SendWork{ |
| 365 | SendWork{ | 360 | .fd = fd, |
| 366 | .fd = fd, | 361 | .flags = flags, |
| 367 | .flags = flags, | 362 | .message = ctx.ReadBuffer(), |
| 368 | .message = ctx.ReadBuffer(), | 363 | }); |
| 369 | }); | ||
| 370 | } | 364 | } |
| 371 | 365 | ||
| 372 | void BSD::SendTo(Kernel::HLERequestContext& ctx) { | 366 | void BSD::SendTo(Kernel::HLERequestContext& ctx) { |
| @@ -377,13 +371,12 @@ void BSD::SendTo(Kernel::HLERequestContext& ctx) { | |||
| 377 | LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags, | 371 | LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags, |
| 378 | ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1)); | 372 | ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1)); |
| 379 | 373 | ||
| 380 | ExecuteWork(ctx, "BSD:SendTo", IsBlockingSocket(fd), | 374 | ExecuteWork(ctx, SendToWork{ |
| 381 | SendToWork{ | 375 | .fd = fd, |
| 382 | .fd = fd, | 376 | .flags = flags, |
| 383 | .flags = flags, | 377 | .message = ctx.ReadBuffer(0), |
| 384 | .message = ctx.ReadBuffer(0), | 378 | .addr = ctx.ReadBuffer(1), |
| 385 | .addr = ctx.ReadBuffer(1), | 379 | }); |
| 386 | }); | ||
| 387 | } | 380 | } |
| 388 | 381 | ||
| 389 | void BSD::Write(Kernel::HLERequestContext& ctx) { | 382 | void BSD::Write(Kernel::HLERequestContext& ctx) { |
| @@ -392,12 +385,11 @@ void BSD::Write(Kernel::HLERequestContext& ctx) { | |||
| 392 | 385 | ||
| 393 | LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize()); | 386 | LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize()); |
| 394 | 387 | ||
| 395 | ExecuteWork(ctx, "BSD:Write", IsBlockingSocket(fd), | 388 | ExecuteWork(ctx, SendWork{ |
| 396 | SendWork{ | 389 | .fd = fd, |
| 397 | .fd = fd, | 390 | .flags = 0, |
| 398 | .flags = 0, | 391 | .message = ctx.ReadBuffer(), |
| 399 | .message = ctx.ReadBuffer(), | 392 | }); |
| 400 | }); | ||
| 401 | } | 393 | } |
| 402 | 394 | ||
| 403 | void BSD::Close(Kernel::HLERequestContext& ctx) { | 395 | void BSD::Close(Kernel::HLERequestContext& ctx) { |
| @@ -410,24 +402,9 @@ void BSD::Close(Kernel::HLERequestContext& ctx) { | |||
| 410 | } | 402 | } |
| 411 | 403 | ||
| 412 | template <typename Work> | 404 | template <typename Work> |
| 413 | void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, | 405 | void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, Work work) { |
| 414 | bool is_blocking, Work work) { | 406 | work.Execute(this); |
| 415 | if (!is_blocking) { | ||
| 416 | work.Execute(this); | ||
| 417 | work.Response(ctx); | ||
| 418 | return; | ||
| 419 | } | ||
| 420 | |||
| 421 | // Signal a dummy response to make IPC validation happy | ||
| 422 | // This will be overwritten by the SleepClientThread callback | ||
| 423 | work.Response(ctx); | 407 | work.Response(ctx); |
| 424 | |||
| 425 | auto worker = worker_pool.CaptureWorker(); | ||
| 426 | |||
| 427 | ctx.SleepClientThread(std::string(sleep_reason), std::numeric_limits<u64>::max(), | ||
| 428 | worker->Callback<Work>(), worker->KernelEvent()); | ||
| 429 | |||
| 430 | worker->SendWork(std::move(work)); | ||
| 431 | } | 408 | } |
| 432 | 409 | ||
| 433 | std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) { | 410 | std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) { |
| @@ -807,18 +784,6 @@ bool BSD::IsFileDescriptorValid(s32 fd) const noexcept { | |||
| 807 | return true; | 784 | return true; |
| 808 | } | 785 | } |
| 809 | 786 | ||
| 810 | bool BSD::IsBlockingSocket(s32 fd) const noexcept { | ||
| 811 | // Inform invalid sockets as non-blocking | ||
| 812 | // This way we avoid using a worker thread as it will fail without blocking host | ||
| 813 | if (fd > static_cast<s32>(MAX_FD) || fd < 0) { | ||
| 814 | return false; | ||
| 815 | } | ||
| 816 | if (!file_descriptors[fd]) { | ||
| 817 | return false; | ||
| 818 | } | ||
| 819 | return (file_descriptors[fd]->flags & FLAG_O_NONBLOCK) != 0; | ||
| 820 | } | ||
| 821 | |||
| 822 | void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept { | 787 | void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept { |
| 823 | IPC::ResponseBuilder rb{ctx, 4}; | 788 | IPC::ResponseBuilder rb{ctx, 4}; |
| 824 | 789 | ||
| @@ -827,8 +792,7 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co | |||
| 827 | rb.PushEnum(bsd_errno); | 792 | rb.PushEnum(bsd_errno); |
| 828 | } | 793 | } |
| 829 | 794 | ||
| 830 | BSD::BSD(Core::System& system_, const char* name) | 795 | BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} { |
| 831 | : ServiceFramework{system_, name}, worker_pool{system_, this} { | ||
| 832 | // clang-format off | 796 | // clang-format off |
| 833 | static const FunctionInfo functions[] = { | 797 | static const FunctionInfo functions[] = { |
| 834 | {0, &BSD::RegisterClient, "RegisterClient"}, | 798 | {0, &BSD::RegisterClient, "RegisterClient"}, |
diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h index f14713fc4..6da0bfeb2 100644 --- a/src/core/hle/service/sockets/bsd.h +++ b/src/core/hle/service/sockets/bsd.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/kernel/hle_ipc.h" | 12 | #include "core/hle/kernel/hle_ipc.h" |
| 13 | #include "core/hle/service/service.h" | 13 | #include "core/hle/service/service.h" |
| 14 | #include "core/hle/service/sockets/blocking_worker.h" | ||
| 15 | #include "core/hle/service/sockets/sockets.h" | 14 | #include "core/hle/service/sockets/sockets.h" |
| 16 | 15 | ||
| 17 | namespace Core { | 16 | namespace Core { |
| @@ -138,8 +137,7 @@ private: | |||
| 138 | void Close(Kernel::HLERequestContext& ctx); | 137 | void Close(Kernel::HLERequestContext& ctx); |
| 139 | 138 | ||
| 140 | template <typename Work> | 139 | template <typename Work> |
| 141 | void ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, | 140 | void ExecuteWork(Kernel::HLERequestContext& ctx, Work work); |
| 142 | bool is_blocking, Work work); | ||
| 143 | 141 | ||
| 144 | std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol); | 142 | std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol); |
| 145 | std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer, | 143 | std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer, |
| @@ -163,15 +161,10 @@ private: | |||
| 163 | 161 | ||
| 164 | s32 FindFreeFileDescriptorHandle() noexcept; | 162 | s32 FindFreeFileDescriptorHandle() noexcept; |
| 165 | bool IsFileDescriptorValid(s32 fd) const noexcept; | 163 | bool IsFileDescriptorValid(s32 fd) const noexcept; |
| 166 | bool IsBlockingSocket(s32 fd) const noexcept; | ||
| 167 | 164 | ||
| 168 | void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept; | 165 | void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept; |
| 169 | 166 | ||
| 170 | std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors; | 167 | std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors; |
| 171 | |||
| 172 | BlockingWorkerPool<BSD, PollWork, AcceptWork, ConnectWork, RecvWork, RecvFromWork, SendWork, | ||
| 173 | SendToWork> | ||
| 174 | worker_pool; | ||
| 175 | }; | 168 | }; |
| 176 | 169 | ||
| 177 | class BSDCFG final : public ServiceFramework<BSDCFG> { | 170 | class BSDCFG final : public ServiceFramework<BSDCFG> { |
diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp index c822d21b8..ca61d72ca 100644 --- a/src/core/hle/service/sockets/sockets_translate.cpp +++ b/src/core/hle/service/sockets/sockets_translate.cpp | |||
| @@ -64,6 +64,7 @@ Network::Type Translate(Type type) { | |||
| 64 | return Network::Type::DGRAM; | 64 | return Network::Type::DGRAM; |
| 65 | default: | 65 | default: |
| 66 | UNIMPLEMENTED_MSG("Unimplemented type={}", type); | 66 | UNIMPLEMENTED_MSG("Unimplemented type={}", type); |
| 67 | return Network::Type{}; | ||
| 67 | } | 68 | } |
| 68 | } | 69 | } |
| 69 | 70 | ||
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 45cfffe06..968cd16b6 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -536,8 +536,7 @@ private: | |||
| 536 | LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, | 536 | LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, |
| 537 | transaction, flags); | 537 | transaction, flags); |
| 538 | 538 | ||
| 539 | const auto guard = nv_flinger.Lock(); | 539 | auto& buffer_queue = *nv_flinger.FindBufferQueue(id); |
| 540 | auto& buffer_queue = nv_flinger.FindBufferQueue(id); | ||
| 541 | 540 | ||
| 542 | switch (transaction) { | 541 | switch (transaction) { |
| 543 | case TransactionId::Connect: { | 542 | case TransactionId::Connect: { |
| @@ -547,6 +546,9 @@ private: | |||
| 547 | Settings::values.resolution_factor.GetValue()), | 546 | Settings::values.resolution_factor.GetValue()), |
| 548 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * | 547 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * |
| 549 | Settings::values.resolution_factor.GetValue())}; | 548 | Settings::values.resolution_factor.GetValue())}; |
| 549 | |||
| 550 | buffer_queue.Connect(); | ||
| 551 | |||
| 550 | ctx.WriteBuffer(response.Serialize()); | 552 | ctx.WriteBuffer(response.Serialize()); |
| 551 | break; | 553 | break; |
| 552 | } | 554 | } |
| @@ -563,40 +565,25 @@ private: | |||
| 563 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; | 565 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 564 | const u32 width{request.data.width}; | 566 | const u32 width{request.data.width}; |
| 565 | const u32 height{request.data.height}; | 567 | const u32 height{request.data.height}; |
| 566 | auto result = buffer_queue.DequeueBuffer(width, height); | 568 | |
| 567 | 569 | do { | |
| 568 | if (result) { | 570 | if (auto result = buffer_queue.DequeueBuffer(width, height); result) { |
| 569 | // Buffer is available | 571 | // Buffer is available |
| 570 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; | 572 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 571 | ctx.WriteBuffer(response.Serialize()); | 573 | ctx.WriteBuffer(response.Serialize()); |
| 572 | } else { | 574 | break; |
| 573 | // Wait the current thread until a buffer becomes available | 575 | } |
| 574 | ctx.SleepClientThread( | 576 | } while (buffer_queue.IsConnected()); |
| 575 | "IHOSBinderDriver::DequeueBuffer", UINT64_MAX, | 577 | |
| 576 | [=, this](std::shared_ptr<Kernel::Thread> thread, | ||
| 577 | Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) { | ||
| 578 | // Repeat TransactParcel DequeueBuffer when a buffer is available | ||
| 579 | const auto guard = nv_flinger.Lock(); | ||
| 580 | auto& buffer_queue = nv_flinger.FindBufferQueue(id); | ||
| 581 | auto result = buffer_queue.DequeueBuffer(width, height); | ||
| 582 | ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); | ||
| 583 | |||
| 584 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; | ||
| 585 | ctx.WriteBuffer(response.Serialize()); | ||
| 586 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 587 | rb.Push(RESULT_SUCCESS); | ||
| 588 | }, | ||
| 589 | buffer_queue.GetWritableBufferWaitEvent()); | ||
| 590 | } | ||
| 591 | break; | 578 | break; |
| 592 | } | 579 | } |
| 593 | case TransactionId::RequestBuffer: { | 580 | case TransactionId::RequestBuffer: { |
| 594 | IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; | 581 | IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; |
| 595 | 582 | ||
| 596 | auto& buffer = buffer_queue.RequestBuffer(request.slot); | 583 | auto& buffer = buffer_queue.RequestBuffer(request.slot); |
| 597 | |||
| 598 | IGBPRequestBufferResponseParcel response{buffer}; | 584 | IGBPRequestBufferResponseParcel response{buffer}; |
| 599 | ctx.WriteBuffer(response.Serialize()); | 585 | ctx.WriteBuffer(response.Serialize()); |
| 586 | |||
| 600 | break; | 587 | break; |
| 601 | } | 588 | } |
| 602 | case TransactionId::QueueBuffer: { | 589 | case TransactionId::QueueBuffer: { |
| @@ -682,7 +669,7 @@ private: | |||
| 682 | 669 | ||
| 683 | LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); | 670 | LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); |
| 684 | 671 | ||
| 685 | const auto& buffer_queue = nv_flinger.FindBufferQueue(id); | 672 | const auto& buffer_queue = *nv_flinger.FindBufferQueue(id); |
| 686 | 673 | ||
| 687 | // TODO(Subv): Find out what this actually is. | 674 | // TODO(Subv): Find out what this actually is. |
| 688 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 675 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index d91c15561..e4f5fd40c 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp | |||
| @@ -185,6 +185,10 @@ constexpr std::array<const char*, 66> RESULT_MESSAGES{ | |||
| 185 | "The INI file contains more than the maximum allowable number of KIP files.", | 185 | "The INI file contains more than the maximum allowable number of KIP files.", |
| 186 | }; | 186 | }; |
| 187 | 187 | ||
| 188 | std::string GetResultStatusString(ResultStatus status) { | ||
| 189 | return RESULT_MESSAGES.at(static_cast<std::size_t>(status)); | ||
| 190 | } | ||
| 191 | |||
| 188 | std::ostream& operator<<(std::ostream& os, ResultStatus status) { | 192 | std::ostream& operator<<(std::ostream& os, ResultStatus status) { |
| 189 | os << RESULT_MESSAGES.at(static_cast<std::size_t>(status)); | 193 | os << RESULT_MESSAGES.at(static_cast<std::size_t>(status)); |
| 190 | return os; | 194 | return os; |
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 36e79e71d..b2e5b13de 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h | |||
| @@ -135,6 +135,7 @@ enum class ResultStatus : u16 { | |||
| 135 | ErrorINITooManyKIPs, | 135 | ErrorINITooManyKIPs, |
| 136 | }; | 136 | }; |
| 137 | 137 | ||
| 138 | std::string GetResultStatusString(ResultStatus status); | ||
| 138 | std::ostream& operator<<(std::ostream& os, ResultStatus status); | 139 | std::ostream& operator<<(std::ostream& os, ResultStatus status); |
| 139 | 140 | ||
| 140 | /// Interface for loading an application | 141 | /// Interface for loading an application |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 54a848936..11609682a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <mutex> | ||
| 8 | #include <optional> | 7 | #include <optional> |
| 9 | #include <utility> | 8 | #include <utility> |
| 10 | 9 | ||
| @@ -45,44 +44,16 @@ struct Memory::Impl { | |||
| 45 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); | 44 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); |
| 46 | } | 45 | } |
| 47 | 46 | ||
| 48 | void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 49 | Common::MemoryHookPointer mmio_handler) { | ||
| 50 | UNIMPLEMENTED(); | ||
| 51 | } | ||
| 52 | |||
| 53 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { | 47 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { |
| 54 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); | 48 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); |
| 55 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); | 49 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); |
| 56 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); | 50 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); |
| 57 | } | 51 | } |
| 58 | 52 | ||
| 59 | void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 60 | Common::MemoryHookPointer hook) { | ||
| 61 | UNIMPLEMENTED(); | ||
| 62 | } | ||
| 63 | |||
| 64 | void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 65 | Common::MemoryHookPointer hook) { | ||
| 66 | UNIMPLEMENTED(); | ||
| 67 | } | ||
| 68 | |||
| 69 | bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { | 53 | bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { |
| 70 | const auto& page_table = process.PageTable().PageTableImpl(); | 54 | const auto& page_table = process.PageTable().PageTableImpl(); |
| 71 | 55 | const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType(); | |
| 72 | const u8* const page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; | 56 | return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory; |
| 73 | if (page_pointer != nullptr) { | ||
| 74 | return true; | ||
| 75 | } | ||
| 76 | |||
| 77 | if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory) { | ||
| 78 | return true; | ||
| 79 | } | ||
| 80 | |||
| 81 | if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special) { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | |||
| 85 | return false; | ||
| 86 | } | 57 | } |
| 87 | 58 | ||
| 88 | bool IsValidVirtualAddress(VAddr vaddr) const { | 59 | bool IsValidVirtualAddress(VAddr vaddr) const { |
| @@ -100,17 +71,15 @@ struct Memory::Impl { | |||
| 100 | } | 71 | } |
| 101 | 72 | ||
| 102 | u8* GetPointer(const VAddr vaddr) const { | 73 | u8* GetPointer(const VAddr vaddr) const { |
| 103 | u8* const page_pointer{current_page_table->pointers[vaddr >> PAGE_BITS]}; | 74 | const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); |
| 104 | if (page_pointer) { | 75 | if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { |
| 105 | return page_pointer + vaddr; | 76 | return pointer + vaddr; |
| 106 | } | 77 | } |
| 107 | 78 | const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer); | |
| 108 | if (current_page_table->attributes[vaddr >> PAGE_BITS] == | 79 | if (type == Common::PageType::RasterizerCachedMemory) { |
| 109 | Common::PageType::RasterizerCachedMemory) { | ||
| 110 | return GetPointerFromRasterizerCachedMemory(vaddr); | 80 | return GetPointerFromRasterizerCachedMemory(vaddr); |
| 111 | } | 81 | } |
| 112 | 82 | return nullptr; | |
| 113 | return {}; | ||
| 114 | } | 83 | } |
| 115 | 84 | ||
| 116 | u8 Read8(const VAddr addr) { | 85 | u8 Read8(const VAddr addr) { |
| @@ -222,7 +191,8 @@ struct Memory::Impl { | |||
| 222 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 191 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 223 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 192 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 224 | 193 | ||
| 225 | switch (page_table.attributes[page_index]) { | 194 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 195 | switch (type) { | ||
| 226 | case Common::PageType::Unmapped: { | 196 | case Common::PageType::Unmapped: { |
| 227 | LOG_ERROR(HW_Memory, | 197 | LOG_ERROR(HW_Memory, |
| 228 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 198 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -231,10 +201,8 @@ struct Memory::Impl { | |||
| 231 | break; | 201 | break; |
| 232 | } | 202 | } |
| 233 | case Common::PageType::Memory: { | 203 | case Common::PageType::Memory: { |
| 234 | DEBUG_ASSERT(page_table.pointers[page_index]); | 204 | DEBUG_ASSERT(pointer); |
| 235 | 205 | const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS); | |
| 236 | const u8* const src_ptr = | ||
| 237 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 238 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 206 | std::memcpy(dest_buffer, src_ptr, copy_amount); |
| 239 | break; | 207 | break; |
| 240 | } | 208 | } |
| @@ -268,7 +236,8 @@ struct Memory::Impl { | |||
| 268 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 236 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 269 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 237 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 270 | 238 | ||
| 271 | switch (page_table.attributes[page_index]) { | 239 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 240 | switch (type) { | ||
| 272 | case Common::PageType::Unmapped: { | 241 | case Common::PageType::Unmapped: { |
| 273 | LOG_ERROR(HW_Memory, | 242 | LOG_ERROR(HW_Memory, |
| 274 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 243 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -277,10 +246,8 @@ struct Memory::Impl { | |||
| 277 | break; | 246 | break; |
| 278 | } | 247 | } |
| 279 | case Common::PageType::Memory: { | 248 | case Common::PageType::Memory: { |
| 280 | DEBUG_ASSERT(page_table.pointers[page_index]); | 249 | DEBUG_ASSERT(pointer); |
| 281 | 250 | const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS); | |
| 282 | const u8* const src_ptr = | ||
| 283 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 284 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 251 | std::memcpy(dest_buffer, src_ptr, copy_amount); |
| 285 | break; | 252 | break; |
| 286 | } | 253 | } |
| @@ -320,7 +287,8 @@ struct Memory::Impl { | |||
| 320 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 287 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 321 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 288 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 322 | 289 | ||
| 323 | switch (page_table.attributes[page_index]) { | 290 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 291 | switch (type) { | ||
| 324 | case Common::PageType::Unmapped: { | 292 | case Common::PageType::Unmapped: { |
| 325 | LOG_ERROR(HW_Memory, | 293 | LOG_ERROR(HW_Memory, |
| 326 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 294 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -328,10 +296,8 @@ struct Memory::Impl { | |||
| 328 | break; | 296 | break; |
| 329 | } | 297 | } |
| 330 | case Common::PageType::Memory: { | 298 | case Common::PageType::Memory: { |
| 331 | DEBUG_ASSERT(page_table.pointers[page_index]); | 299 | DEBUG_ASSERT(pointer); |
| 332 | 300 | u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); | |
| 333 | u8* const dest_ptr = | ||
| 334 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 335 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 301 | std::memcpy(dest_ptr, src_buffer, copy_amount); |
| 336 | break; | 302 | break; |
| 337 | } | 303 | } |
| @@ -364,7 +330,8 @@ struct Memory::Impl { | |||
| 364 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 330 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 365 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 331 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 366 | 332 | ||
| 367 | switch (page_table.attributes[page_index]) { | 333 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 334 | switch (type) { | ||
| 368 | case Common::PageType::Unmapped: { | 335 | case Common::PageType::Unmapped: { |
| 369 | LOG_ERROR(HW_Memory, | 336 | LOG_ERROR(HW_Memory, |
| 370 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 337 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -372,10 +339,8 @@ struct Memory::Impl { | |||
| 372 | break; | 339 | break; |
| 373 | } | 340 | } |
| 374 | case Common::PageType::Memory: { | 341 | case Common::PageType::Memory: { |
| 375 | DEBUG_ASSERT(page_table.pointers[page_index]); | 342 | DEBUG_ASSERT(pointer); |
| 376 | 343 | u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); | |
| 377 | u8* const dest_ptr = | ||
| 378 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 379 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 344 | std::memcpy(dest_ptr, src_buffer, copy_amount); |
| 380 | break; | 345 | break; |
| 381 | } | 346 | } |
| @@ -414,7 +379,8 @@ struct Memory::Impl { | |||
| 414 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 379 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 415 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 380 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 416 | 381 | ||
| 417 | switch (page_table.attributes[page_index]) { | 382 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 383 | switch (type) { | ||
| 418 | case Common::PageType::Unmapped: { | 384 | case Common::PageType::Unmapped: { |
| 419 | LOG_ERROR(HW_Memory, | 385 | LOG_ERROR(HW_Memory, |
| 420 | "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 386 | "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -422,10 +388,8 @@ struct Memory::Impl { | |||
| 422 | break; | 388 | break; |
| 423 | } | 389 | } |
| 424 | case Common::PageType::Memory: { | 390 | case Common::PageType::Memory: { |
| 425 | DEBUG_ASSERT(page_table.pointers[page_index]); | 391 | DEBUG_ASSERT(pointer); |
| 426 | 392 | u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); | |
| 427 | u8* dest_ptr = | ||
| 428 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 429 | std::memset(dest_ptr, 0, copy_amount); | 393 | std::memset(dest_ptr, 0, copy_amount); |
| 430 | break; | 394 | break; |
| 431 | } | 395 | } |
| @@ -461,7 +425,8 @@ struct Memory::Impl { | |||
| 461 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | 425 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); |
| 462 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | 426 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); |
| 463 | 427 | ||
| 464 | switch (page_table.attributes[page_index]) { | 428 | const auto [pointer, type] = page_table.pointers[page_index].PointerType(); |
| 429 | switch (type) { | ||
| 465 | case Common::PageType::Unmapped: { | 430 | case Common::PageType::Unmapped: { |
| 466 | LOG_ERROR(HW_Memory, | 431 | LOG_ERROR(HW_Memory, |
| 467 | "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | 432 | "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", |
| @@ -470,9 +435,8 @@ struct Memory::Impl { | |||
| 470 | break; | 435 | break; |
| 471 | } | 436 | } |
| 472 | case Common::PageType::Memory: { | 437 | case Common::PageType::Memory: { |
| 473 | DEBUG_ASSERT(page_table.pointers[page_index]); | 438 | DEBUG_ASSERT(pointer); |
| 474 | const u8* src_ptr = | 439 | const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS); |
| 475 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 476 | WriteBlock(process, dest_addr, src_ptr, copy_amount); | 440 | WriteBlock(process, dest_addr, src_ptr, copy_amount); |
| 477 | break; | 441 | break; |
| 478 | } | 442 | } |
| @@ -498,34 +462,19 @@ struct Memory::Impl { | |||
| 498 | return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); | 462 | return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); |
| 499 | } | 463 | } |
| 500 | 464 | ||
| 501 | struct PageEntry { | ||
| 502 | u8* const pointer; | ||
| 503 | const Common::PageType attribute; | ||
| 504 | }; | ||
| 505 | |||
| 506 | PageEntry SafePageEntry(std::size_t base) const { | ||
| 507 | std::lock_guard lock{rasterizer_cache_guard}; | ||
| 508 | return { | ||
| 509 | .pointer = current_page_table->pointers[base], | ||
| 510 | .attribute = current_page_table->attributes[base], | ||
| 511 | }; | ||
| 512 | } | ||
| 513 | |||
| 514 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { | 465 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { |
| 515 | std::lock_guard lock{rasterizer_cache_guard}; | ||
| 516 | if (vaddr == 0) { | 466 | if (vaddr == 0) { |
| 517 | return; | 467 | return; |
| 518 | } | 468 | } |
| 519 | |||
| 520 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU | 469 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU |
| 521 | // address space, marking the region as un/cached. The region is marked un/cached at a | 470 | // address space, marking the region as un/cached. The region is marked un/cached at a |
| 522 | // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size | 471 | // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size |
| 523 | // is different). This assumes the specified GPU address region is contiguous as well. | 472 | // is different). This assumes the specified GPU address region is contiguous as well. |
| 524 | 473 | ||
| 525 | u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; | 474 | const u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; |
| 526 | for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { | 475 | for (u64 i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { |
| 527 | Common::PageType& page_type{current_page_table->attributes[vaddr >> PAGE_BITS]}; | 476 | const Common::PageType page_type{ |
| 528 | 477 | current_page_table->pointers[vaddr >> PAGE_BITS].Type()}; | |
| 529 | if (cached) { | 478 | if (cached) { |
| 530 | // Switch page type to cached if now cached | 479 | // Switch page type to cached if now cached |
| 531 | switch (page_type) { | 480 | switch (page_type) { |
| @@ -534,8 +483,8 @@ struct Memory::Impl { | |||
| 534 | // space, for example, a system module need not have a VRAM mapping. | 483 | // space, for example, a system module need not have a VRAM mapping. |
| 535 | break; | 484 | break; |
| 536 | case Common::PageType::Memory: | 485 | case Common::PageType::Memory: |
| 537 | page_type = Common::PageType::RasterizerCachedMemory; | 486 | current_page_table->pointers[vaddr >> PAGE_BITS].Store( |
| 538 | current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; | 487 | nullptr, Common::PageType::RasterizerCachedMemory); |
| 539 | break; | 488 | break; |
| 540 | case Common::PageType::RasterizerCachedMemory: | 489 | case Common::PageType::RasterizerCachedMemory: |
| 541 | // There can be more than one GPU region mapped per CPU region, so it's common | 490 | // There can be more than one GPU region mapped per CPU region, so it's common |
| @@ -556,16 +505,16 @@ struct Memory::Impl { | |||
| 556 | // that this area is already unmarked as cached. | 505 | // that this area is already unmarked as cached. |
| 557 | break; | 506 | break; |
| 558 | case Common::PageType::RasterizerCachedMemory: { | 507 | case Common::PageType::RasterizerCachedMemory: { |
| 559 | u8* pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)}; | 508 | u8* const pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)}; |
| 560 | if (pointer == nullptr) { | 509 | if (pointer == nullptr) { |
| 561 | // It's possible that this function has been called while updating the | 510 | // It's possible that this function has been called while updating the |
| 562 | // pagetable after unmapping a VMA. In that case the underlying VMA will no | 511 | // pagetable after unmapping a VMA. In that case the underlying VMA will no |
| 563 | // longer exist, and we should just leave the pagetable entry blank. | 512 | // longer exist, and we should just leave the pagetable entry blank. |
| 564 | page_type = Common::PageType::Unmapped; | 513 | current_page_table->pointers[vaddr >> PAGE_BITS].Store( |
| 514 | nullptr, Common::PageType::Unmapped); | ||
| 565 | } else { | 515 | } else { |
| 566 | current_page_table->pointers[vaddr >> PAGE_BITS] = | 516 | current_page_table->pointers[vaddr >> PAGE_BITS].Store( |
| 567 | pointer - (vaddr & ~PAGE_MASK); | 517 | pointer - (vaddr & ~PAGE_MASK), Common::PageType::Memory); |
| 568 | page_type = Common::PageType::Memory; | ||
| 569 | } | 518 | } |
| 570 | break; | 519 | break; |
| 571 | } | 520 | } |
| @@ -595,7 +544,7 @@ struct Memory::Impl { | |||
| 595 | auto& gpu = system.GPU(); | 544 | auto& gpu = system.GPU(); |
| 596 | for (u64 i = 0; i < size; i++) { | 545 | for (u64 i = 0; i < size; i++) { |
| 597 | const auto page = base + i; | 546 | const auto page = base + i; |
| 598 | if (page_table.attributes[page] == Common::PageType::RasterizerCachedMemory) { | 547 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { |
| 599 | gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE); | 548 | gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE); |
| 600 | } | 549 | } |
| 601 | } | 550 | } |
| @@ -610,20 +559,18 @@ struct Memory::Impl { | |||
| 610 | "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE); | 559 | "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE); |
| 611 | 560 | ||
| 612 | while (base != end) { | 561 | while (base != end) { |
| 613 | page_table.attributes[base] = type; | 562 | page_table.pointers[base].Store(nullptr, type); |
| 614 | page_table.pointers[base] = nullptr; | ||
| 615 | page_table.backing_addr[base] = 0; | 563 | page_table.backing_addr[base] = 0; |
| 616 | 564 | ||
| 617 | base += 1; | 565 | base += 1; |
| 618 | } | 566 | } |
| 619 | } else { | 567 | } else { |
| 620 | while (base != end) { | 568 | while (base != end) { |
| 621 | page_table.pointers[base] = | 569 | page_table.pointers[base].Store( |
| 622 | system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS); | 570 | system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS), type); |
| 623 | page_table.attributes[base] = type; | ||
| 624 | page_table.backing_addr[base] = target - (base << PAGE_BITS); | 571 | page_table.backing_addr[base] = target - (base << PAGE_BITS); |
| 625 | 572 | ||
| 626 | ASSERT_MSG(page_table.pointers[base], | 573 | ASSERT_MSG(page_table.pointers[base].Pointer(), |
| 627 | "memory mapping base yield a nullptr within the table"); | 574 | "memory mapping base yield a nullptr within the table"); |
| 628 | 575 | ||
| 629 | base += 1; | 576 | base += 1; |
| @@ -646,21 +593,13 @@ struct Memory::Impl { | |||
| 646 | template <typename T> | 593 | template <typename T> |
| 647 | T Read(const VAddr vaddr) { | 594 | T Read(const VAddr vaddr) { |
| 648 | // Avoid adding any extra logic to this fast-path block | 595 | // Avoid adding any extra logic to this fast-path block |
| 649 | if (const u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { | 596 | const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); |
| 597 | if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { | ||
| 650 | T value; | 598 | T value; |
| 651 | std::memcpy(&value, &pointer[vaddr], sizeof(T)); | 599 | std::memcpy(&value, &pointer[vaddr], sizeof(T)); |
| 652 | return value; | 600 | return value; |
| 653 | } | 601 | } |
| 654 | 602 | switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { | |
| 655 | // Otherwise, we need to grab the page with a lock, in case it is currently being modified | ||
| 656 | const auto entry = SafePageEntry(vaddr >> PAGE_BITS); | ||
| 657 | if (entry.pointer) { | ||
| 658 | T value; | ||
| 659 | std::memcpy(&value, &entry.pointer[vaddr], sizeof(T)); | ||
| 660 | return value; | ||
| 661 | } | ||
| 662 | |||
| 663 | switch (entry.attribute) { | ||
| 664 | case Common::PageType::Unmapped: | 603 | case Common::PageType::Unmapped: |
| 665 | LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); | 604 | LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); |
| 666 | return 0; | 605 | return 0; |
| @@ -692,20 +631,12 @@ struct Memory::Impl { | |||
| 692 | template <typename T> | 631 | template <typename T> |
| 693 | void Write(const VAddr vaddr, const T data) { | 632 | void Write(const VAddr vaddr, const T data) { |
| 694 | // Avoid adding any extra logic to this fast-path block | 633 | // Avoid adding any extra logic to this fast-path block |
| 695 | if (u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { | 634 | const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); |
| 635 | if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { | ||
| 696 | std::memcpy(&pointer[vaddr], &data, sizeof(T)); | 636 | std::memcpy(&pointer[vaddr], &data, sizeof(T)); |
| 697 | return; | 637 | return; |
| 698 | } | 638 | } |
| 699 | 639 | switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { | |
| 700 | // Otherwise, we need to grab the page with a lock, in case it is currently being modified | ||
| 701 | const auto entry = SafePageEntry(vaddr >> PAGE_BITS); | ||
| 702 | if (entry.pointer) { | ||
| 703 | // Memory was mapped, we are done | ||
| 704 | std::memcpy(&entry.pointer[vaddr], &data, sizeof(T)); | ||
| 705 | return; | ||
| 706 | } | ||
| 707 | |||
| 708 | switch (entry.attribute) { | ||
| 709 | case Common::PageType::Unmapped: | 640 | case Common::PageType::Unmapped: |
| 710 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, | 641 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, |
| 711 | static_cast<u32>(data), vaddr); | 642 | static_cast<u32>(data), vaddr); |
| @@ -726,15 +657,13 @@ struct Memory::Impl { | |||
| 726 | 657 | ||
| 727 | template <typename T> | 658 | template <typename T> |
| 728 | bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { | 659 | bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { |
| 729 | u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; | 660 | const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); |
| 730 | if (page_pointer != nullptr) { | 661 | if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { |
| 731 | // NOTE: Avoid adding any extra logic to this fast-path block | 662 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 732 | auto* pointer = reinterpret_cast<volatile T*>(&page_pointer[vaddr]); | 663 | const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]); |
| 733 | return Common::AtomicCompareAndSwap(pointer, data, expected); | 664 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| 734 | } | 665 | } |
| 735 | 666 | switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { | |
| 736 | const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; | ||
| 737 | switch (type) { | ||
| 738 | case Common::PageType::Unmapped: | 667 | case Common::PageType::Unmapped: |
| 739 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, | 668 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, |
| 740 | static_cast<u32>(data), vaddr); | 669 | static_cast<u32>(data), vaddr); |
| @@ -755,15 +684,13 @@ struct Memory::Impl { | |||
| 755 | } | 684 | } |
| 756 | 685 | ||
| 757 | bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { | 686 | bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { |
| 758 | u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; | 687 | const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); |
| 759 | if (page_pointer != nullptr) { | 688 | if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { |
| 760 | // NOTE: Avoid adding any extra logic to this fast-path block | 689 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 761 | auto* pointer = reinterpret_cast<volatile u64*>(&page_pointer[vaddr]); | 690 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]); |
| 762 | return Common::AtomicCompareAndSwap(pointer, data, expected); | 691 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| 763 | } | 692 | } |
| 764 | 693 | switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { | |
| 765 | const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; | ||
| 766 | switch (type) { | ||
| 767 | case Common::PageType::Unmapped: | 694 | case Common::PageType::Unmapped: |
| 768 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, | 695 | LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, |
| 769 | static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr); | 696 | static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr); |
| @@ -783,7 +710,6 @@ struct Memory::Impl { | |||
| 783 | return true; | 710 | return true; |
| 784 | } | 711 | } |
| 785 | 712 | ||
| 786 | mutable std::mutex rasterizer_cache_guard; | ||
| 787 | Common::PageTable* current_page_table = nullptr; | 713 | Common::PageTable* current_page_table = nullptr; |
| 788 | Core::System& system; | 714 | Core::System& system; |
| 789 | }; | 715 | }; |
| @@ -799,25 +725,10 @@ void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size | |||
| 799 | impl->MapMemoryRegion(page_table, base, size, target); | 725 | impl->MapMemoryRegion(page_table, base, size, target); |
| 800 | } | 726 | } |
| 801 | 727 | ||
| 802 | void Memory::MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 803 | Common::MemoryHookPointer mmio_handler) { | ||
| 804 | impl->MapIoRegion(page_table, base, size, std::move(mmio_handler)); | ||
| 805 | } | ||
| 806 | |||
| 807 | void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { | 728 | void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { |
| 808 | impl->UnmapRegion(page_table, base, size); | 729 | impl->UnmapRegion(page_table, base, size); |
| 809 | } | 730 | } |
| 810 | 731 | ||
| 811 | void Memory::AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 812 | Common::MemoryHookPointer hook) { | ||
| 813 | impl->AddDebugHook(page_table, base, size, std::move(hook)); | ||
| 814 | } | ||
| 815 | |||
| 816 | void Memory::RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 817 | Common::MemoryHookPointer hook) { | ||
| 818 | impl->RemoveDebugHook(page_table, base, size, std::move(hook)); | ||
| 819 | } | ||
| 820 | |||
| 821 | bool Memory::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { | 732 | bool Memory::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { |
| 822 | return impl->IsValidVirtualAddress(process, vaddr); | 733 | return impl->IsValidVirtualAddress(process, vaddr); |
| 823 | } | 734 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 4a1cc63f4..705ebb23d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/memory_hook.h" | ||
| 12 | 11 | ||
| 13 | namespace Common { | 12 | namespace Common { |
| 14 | struct PageTable; | 13 | struct PageTable; |
| @@ -78,17 +77,6 @@ public: | |||
| 78 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target); | 77 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target); |
| 79 | 78 | ||
| 80 | /** | 79 | /** |
| 81 | * Maps a region of the emulated process address space as a IO region. | ||
| 82 | * | ||
| 83 | * @param page_table The page table of the emulated process. | ||
| 84 | * @param base The address to start mapping at. Must be page-aligned. | ||
| 85 | * @param size The amount of bytes to map. Must be page-aligned. | ||
| 86 | * @param mmio_handler The handler that backs the mapping. | ||
| 87 | */ | ||
| 88 | void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 89 | Common::MemoryHookPointer mmio_handler); | ||
| 90 | |||
| 91 | /** | ||
| 92 | * Unmaps a region of the emulated process address space. | 80 | * Unmaps a region of the emulated process address space. |
| 93 | * | 81 | * |
| 94 | * @param page_table The page table of the emulated process. | 82 | * @param page_table The page table of the emulated process. |
| @@ -98,28 +86,6 @@ public: | |||
| 98 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size); | 86 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size); |
| 99 | 87 | ||
| 100 | /** | 88 | /** |
| 101 | * Adds a memory hook to intercept reads and writes to given region of memory. | ||
| 102 | * | ||
| 103 | * @param page_table The page table of the emulated process | ||
| 104 | * @param base The starting address to apply the hook to. | ||
| 105 | * @param size The size of the memory region to apply the hook to, in bytes. | ||
| 106 | * @param hook The hook to apply to the region of memory. | ||
| 107 | */ | ||
| 108 | void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 109 | Common::MemoryHookPointer hook); | ||
| 110 | |||
| 111 | /** | ||
| 112 | * Removes a memory hook from a given range of memory. | ||
| 113 | * | ||
| 114 | * @param page_table The page table of the emulated process. | ||
| 115 | * @param base The starting address to remove the hook from. | ||
| 116 | * @param size The size of the memory region to remove the hook from, in bytes. | ||
| 117 | * @param hook The hook to remove from the specified region of memory. | ||
| 118 | */ | ||
| 119 | void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 120 | Common::MemoryHookPointer hook); | ||
| 121 | |||
| 122 | /** | ||
| 123 | * Checks whether or not the supplied address is a valid virtual | 89 | * Checks whether or not the supplied address is a valid virtual |
| 124 | * address for the given process. | 90 | * address for the given process. |
| 125 | * | 91 | * |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 47d9ecf9a..39306509a 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -148,9 +148,4 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 148 | values.motion_enabled.SetGlobal(true); | 148 | values.motion_enabled.SetGlobal(true); |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | void Sanitize() { | ||
| 152 | values.use_asynchronous_gpu_emulation.SetValue( | ||
| 153 | values.use_asynchronous_gpu_emulation.GetValue() || values.use_multi_core.GetValue()); | ||
| 154 | } | ||
| 155 | |||
| 156 | } // namespace Settings | 151 | } // namespace Settings |
diff --git a/src/core/settings.h b/src/core/settings.h index d5f8d2b7e..a324530bd 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -131,6 +131,7 @@ struct Values { | |||
| 131 | 131 | ||
| 132 | bool cpuopt_unsafe_unfuse_fma; | 132 | bool cpuopt_unsafe_unfuse_fma; |
| 133 | bool cpuopt_unsafe_reduce_fp_error; | 133 | bool cpuopt_unsafe_reduce_fp_error; |
| 134 | bool cpuopt_unsafe_inaccurate_nan; | ||
| 134 | 135 | ||
| 135 | // Renderer | 136 | // Renderer |
| 136 | Setting<RendererBackend> renderer_backend; | 137 | Setting<RendererBackend> renderer_backend; |
| @@ -221,7 +222,7 @@ struct Values { | |||
| 221 | bool disable_macro_jit; | 222 | bool disable_macro_jit; |
| 222 | bool extended_logging; | 223 | bool extended_logging; |
| 223 | 224 | ||
| 224 | // Misceallaneous | 225 | // Miscellaneous |
| 225 | std::string log_filter; | 226 | std::string log_filter; |
| 226 | bool use_dev_keys; | 227 | bool use_dev_keys; |
| 227 | 228 | ||
| @@ -257,7 +258,4 @@ void LogSettings(); | |||
| 257 | // Restore the global state of all applicable settings in the Values struct | 258 | // Restore the global state of all applicable settings in the Values struct |
| 258 | void RestoreGlobalState(bool is_powered_on); | 259 | void RestoreGlobalState(bool is_powered_on); |
| 259 | 260 | ||
| 260 | // Fixes settings that are known to cause issues with the emulator | ||
| 261 | void Sanitize(); | ||
| 262 | |||
| 263 | } // namespace Settings | 261 | } // namespace Settings |
diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h index f1256c9da..7a6c545bd 100644 --- a/src/input_common/gcadapter/gc_adapter.h +++ b/src/input_common/gcadapter/gc_adapter.h | |||
| @@ -120,17 +120,17 @@ private: | |||
| 120 | /// For use in initialization, querying devices to find the adapter | 120 | /// For use in initialization, querying devices to find the adapter |
| 121 | void Setup(); | 121 | void Setup(); |
| 122 | 122 | ||
| 123 | /// Resets status of all GC controller devices to a disconected state | 123 | /// Resets status of all GC controller devices to a disconnected state |
| 124 | void ResetDevices(); | 124 | void ResetDevices(); |
| 125 | 125 | ||
| 126 | /// Resets status of device connected to a disconected state | 126 | /// Resets status of device connected to a disconnected state |
| 127 | void ResetDevice(std::size_t port); | 127 | void ResetDevice(std::size_t port); |
| 128 | 128 | ||
| 129 | /// Returns true if we successfully gain access to GC Adapter | 129 | /// Returns true if we successfully gain access to GC Adapter |
| 130 | bool CheckDeviceAccess(); | 130 | bool CheckDeviceAccess(); |
| 131 | 131 | ||
| 132 | /// Captures GC Adapter endpoint address | 132 | /// Captures GC Adapter endpoint address |
| 133 | /// Returns true if the endpoind was set correctly | 133 | /// Returns true if the endpoint was set correctly |
| 134 | bool GetGCEndpoint(libusb_device* device); | 134 | bool GetGCEndpoint(libusb_device* device); |
| 135 | 135 | ||
| 136 | /// For shutting down, clear all data, join all threads, release usb | 136 | /// For shutting down, clear all data, join all threads, release usb |
diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp index 4d1052414..9670bdeb2 100644 --- a/src/input_common/gcadapter/gc_poller.cpp +++ b/src/input_common/gcadapter/gc_poller.cpp | |||
| @@ -139,10 +139,10 @@ void GCButtonFactory::EndConfiguration() { | |||
| 139 | 139 | ||
| 140 | class GCAnalog final : public Input::AnalogDevice { | 140 | class GCAnalog final : public Input::AnalogDevice { |
| 141 | public: | 141 | public: |
| 142 | explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, | 142 | explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_, |
| 143 | const GCAdapter::Adapter* adapter, float range_) | 143 | float deadzone_, float range_, const GCAdapter::Adapter* adapter) |
| 144 | : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), | 144 | : port(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_), |
| 145 | range(range_) {} | 145 | deadzone(deadzone_), range(range_), gcadapter(adapter) {} |
| 146 | 146 | ||
| 147 | float GetAxis(u32 axis) const { | 147 | float GetAxis(u32 axis) const { |
| 148 | if (gcadapter->DeviceConnected(port)) { | 148 | if (gcadapter->DeviceConnected(port)) { |
| @@ -157,7 +157,12 @@ public: | |||
| 157 | std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { | 157 | std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { |
| 158 | float x = GetAxis(analog_axis_x); | 158 | float x = GetAxis(analog_axis_x); |
| 159 | float y = GetAxis(analog_axis_y); | 159 | float y = GetAxis(analog_axis_y); |
| 160 | 160 | if (invert_x) { | |
| 161 | x = -x; | ||
| 162 | } | ||
| 163 | if (invert_y) { | ||
| 164 | y = -y; | ||
| 165 | } | ||
| 161 | // Make sure the coordinates are in the unit circle, | 166 | // Make sure the coordinates are in the unit circle, |
| 162 | // otherwise normalize it. | 167 | // otherwise normalize it. |
| 163 | float r = x * x + y * y; | 168 | float r = x * x + y * y; |
| @@ -200,9 +205,11 @@ private: | |||
| 200 | const u32 port; | 205 | const u32 port; |
| 201 | const u32 axis_x; | 206 | const u32 axis_x; |
| 202 | const u32 axis_y; | 207 | const u32 axis_y; |
| 208 | const bool invert_x; | ||
| 209 | const bool invert_y; | ||
| 203 | const float deadzone; | 210 | const float deadzone; |
| 204 | const GCAdapter::Adapter* gcadapter; | ||
| 205 | const float range; | 211 | const float range; |
| 212 | const GCAdapter::Adapter* gcadapter; | ||
| 206 | mutable std::mutex mutex; | 213 | mutable std::mutex mutex; |
| 207 | }; | 214 | }; |
| 208 | 215 | ||
| @@ -223,8 +230,13 @@ std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::Param | |||
| 223 | const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); | 230 | const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); |
| 224 | const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); | 231 | const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); |
| 225 | const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); | 232 | const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); |
| 233 | const std::string invert_x_value = params.Get("invert_x", "+"); | ||
| 234 | const std::string invert_y_value = params.Get("invert_y", "+"); | ||
| 235 | const bool invert_x = invert_x_value == "-"; | ||
| 236 | const bool invert_y = invert_y_value == "-"; | ||
| 226 | 237 | ||
| 227 | return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get(), range); | 238 | return std::make_unique<GCAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range, |
| 239 | adapter.get()); | ||
| 228 | } | 240 | } |
| 229 | 241 | ||
| 230 | void GCAnalogFactory::BeginConfiguration() { | 242 | void GCAnalogFactory::BeginConfiguration() { |
| @@ -282,6 +294,8 @@ Common::ParamPackage GCAnalogFactory::GetNextInput() { | |||
| 282 | params.Set("port", controller_number); | 294 | params.Set("port", controller_number); |
| 283 | params.Set("axis_x", analog_x_axis); | 295 | params.Set("axis_x", analog_x_axis); |
| 284 | params.Set("axis_y", analog_y_axis); | 296 | params.Set("axis_y", analog_y_axis); |
| 297 | params.Set("invert_x", "+"); | ||
| 298 | params.Set("invert_y", "+"); | ||
| 285 | analog_x_axis = -1; | 299 | analog_x_axis = -1; |
| 286 | analog_y_axis = -1; | 300 | analog_y_axis = -1; |
| 287 | controller_number = -1; | 301 | controller_number = -1; |
diff --git a/src/input_common/motion_input.cpp b/src/input_common/motion_input.cpp index f77ba535d..6a65f175e 100644 --- a/src/input_common/motion_input.cpp +++ b/src/input_common/motion_input.cpp | |||
| @@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) { | |||
| 129 | rad_gyro += ki * integral_error; | 129 | rad_gyro += ki * integral_error; |
| 130 | rad_gyro += kd * derivative_error; | 130 | rad_gyro += kd * derivative_error; |
| 131 | } else { | 131 | } else { |
| 132 | // Give more weight to acelerometer values to compensate for the lack of gyro | 132 | // Give more weight to accelerometer values to compensate for the lack of gyro |
| 133 | rad_gyro += 35.0f * kp * real_error; | 133 | rad_gyro += 35.0f * kp * real_error; |
| 134 | rad_gyro += 10.0f * ki * integral_error; | 134 | rad_gyro += 10.0f * ki * integral_error; |
| 135 | rad_gyro += 10.0f * kd * derivative_error; | 135 | rad_gyro += 10.0f * kd * derivative_error; |
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h index 65e64bee7..58803c1bf 100644 --- a/src/input_common/mouse/mouse_input.h +++ b/src/input_common/mouse/mouse_input.h | |||
| @@ -20,7 +20,7 @@ enum class MouseButton { | |||
| 20 | Left, | 20 | Left, |
| 21 | Wheel, | 21 | Wheel, |
| 22 | Right, | 22 | Right, |
| 23 | Foward, | 23 | Forward, |
| 24 | Backward, | 24 | Backward, |
| 25 | Undefined, | 25 | Undefined, |
| 26 | }; | 26 | }; |
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp index 7445ad3ad..508eb0c7d 100644 --- a/src/input_common/mouse/mouse_poller.cpp +++ b/src/input_common/mouse/mouse_poller.cpp | |||
| @@ -62,10 +62,10 @@ void MouseButtonFactory::EndConfiguration() { | |||
| 62 | 62 | ||
| 63 | class MouseAnalog final : public Input::AnalogDevice { | 63 | class MouseAnalog final : public Input::AnalogDevice { |
| 64 | public: | 64 | public: |
| 65 | explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, float range_, | 65 | explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_, |
| 66 | const MouseInput::Mouse* mouse_input_) | 66 | float deadzone_, float range_, const MouseInput::Mouse* mouse_input_) |
| 67 | : button(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), range(range_), | 67 | : button(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_), |
| 68 | mouse_input(mouse_input_) {} | 68 | deadzone(deadzone_), range(range_), mouse_input(mouse_input_) {} |
| 69 | 69 | ||
| 70 | float GetAxis(u32 axis) const { | 70 | float GetAxis(u32 axis) const { |
| 71 | std::lock_guard lock{mutex}; | 71 | std::lock_guard lock{mutex}; |
| @@ -77,6 +77,12 @@ public: | |||
| 77 | std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { | 77 | std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { |
| 78 | float x = GetAxis(analog_axis_x); | 78 | float x = GetAxis(analog_axis_x); |
| 79 | float y = GetAxis(analog_axis_y); | 79 | float y = GetAxis(analog_axis_y); |
| 80 | if (invert_x) { | ||
| 81 | x = -x; | ||
| 82 | } | ||
| 83 | if (invert_y) { | ||
| 84 | y = -y; | ||
| 85 | } | ||
| 80 | 86 | ||
| 81 | // Make sure the coordinates are in the unit circle, | 87 | // Make sure the coordinates are in the unit circle, |
| 82 | // otherwise normalize it. | 88 | // otherwise normalize it. |
| @@ -104,6 +110,8 @@ private: | |||
| 104 | const u32 button; | 110 | const u32 button; |
| 105 | const u32 axis_x; | 111 | const u32 axis_x; |
| 106 | const u32 axis_y; | 112 | const u32 axis_y; |
| 113 | const bool invert_x; | ||
| 114 | const bool invert_y; | ||
| 107 | const float deadzone; | 115 | const float deadzone; |
| 108 | const float range; | 116 | const float range; |
| 109 | const MouseInput::Mouse* mouse_input; | 117 | const MouseInput::Mouse* mouse_input; |
| @@ -128,8 +136,13 @@ std::unique_ptr<Input::AnalogDevice> MouseAnalogFactory::Create( | |||
| 128 | const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); | 136 | const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); |
| 129 | const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); | 137 | const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); |
| 130 | const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); | 138 | const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); |
| 139 | const std::string invert_x_value = params.Get("invert_x", "+"); | ||
| 140 | const std::string invert_y_value = params.Get("invert_y", "+"); | ||
| 141 | const bool invert_x = invert_x_value == "-"; | ||
| 142 | const bool invert_y = invert_y_value == "-"; | ||
| 131 | 143 | ||
| 132 | return std::make_unique<MouseAnalog>(port, axis_x, axis_y, deadzone, range, mouse_input.get()); | 144 | return std::make_unique<MouseAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range, |
| 145 | mouse_input.get()); | ||
| 133 | } | 146 | } |
| 134 | 147 | ||
| 135 | void MouseAnalogFactory::BeginConfiguration() { | 148 | void MouseAnalogFactory::BeginConfiguration() { |
| @@ -153,6 +166,8 @@ Common::ParamPackage MouseAnalogFactory::GetNextInput() const { | |||
| 153 | params.Set("port", static_cast<u16>(pad.button)); | 166 | params.Set("port", static_cast<u16>(pad.button)); |
| 154 | params.Set("axis_x", 0); | 167 | params.Set("axis_x", 0); |
| 155 | params.Set("axis_y", 1); | 168 | params.Set("axis_y", 1); |
| 169 | params.Set("invert_x", "+"); | ||
| 170 | params.Set("invert_y", "+"); | ||
| 156 | return params; | 171 | return params; |
| 157 | } | 172 | } |
| 158 | } | 173 | } |
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index 7827e324c..d32eb732a 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp | |||
| @@ -352,13 +352,20 @@ private: | |||
| 352 | class SDLAnalog final : public Input::AnalogDevice { | 352 | class SDLAnalog final : public Input::AnalogDevice { |
| 353 | public: | 353 | public: |
| 354 | explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, | 354 | explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, |
| 355 | float deadzone_, float range_) | 355 | bool invert_x_, bool invert_y_, float deadzone_, float range_) |
| 356 | : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), | 356 | : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), |
| 357 | range(range_) {} | 357 | invert_y(invert_y_), deadzone(deadzone_), range(range_) {} |
| 358 | 358 | ||
| 359 | std::tuple<float, float> GetStatus() const override { | 359 | std::tuple<float, float> GetStatus() const override { |
| 360 | const auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); | 360 | auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); |
| 361 | const float r = std::sqrt((x * x) + (y * y)); | 361 | const float r = std::sqrt((x * x) + (y * y)); |
| 362 | if (invert_x) { | ||
| 363 | x = -x; | ||
| 364 | } | ||
| 365 | if (invert_y) { | ||
| 366 | y = -y; | ||
| 367 | } | ||
| 368 | |||
| 362 | if (r > deadzone) { | 369 | if (r > deadzone) { |
| 363 | return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone), | 370 | return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone), |
| 364 | y / r * (r - deadzone) / (1 - deadzone)); | 371 | y / r * (r - deadzone) / (1 - deadzone)); |
| @@ -386,6 +393,8 @@ private: | |||
| 386 | std::shared_ptr<SDLJoystick> joystick; | 393 | std::shared_ptr<SDLJoystick> joystick; |
| 387 | const int axis_x; | 394 | const int axis_x; |
| 388 | const int axis_y; | 395 | const int axis_y; |
| 396 | const bool invert_x; | ||
| 397 | const bool invert_y; | ||
| 389 | const float deadzone; | 398 | const float deadzone; |
| 390 | const float range; | 399 | const float range; |
| 391 | }; | 400 | }; |
| @@ -572,12 +581,17 @@ public: | |||
| 572 | const int axis_y = params.Get("axis_y", 1); | 581 | const int axis_y = params.Get("axis_y", 1); |
| 573 | const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); | 582 | const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); |
| 574 | const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); | 583 | const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); |
| 584 | const std::string invert_x_value = params.Get("invert_x", "+"); | ||
| 585 | const std::string invert_y_value = params.Get("invert_y", "+"); | ||
| 586 | const bool invert_x = invert_x_value == "-"; | ||
| 587 | const bool invert_y = invert_y_value == "-"; | ||
| 575 | auto joystick = state.GetSDLJoystickByGUID(guid, port); | 588 | auto joystick = state.GetSDLJoystickByGUID(guid, port); |
| 576 | 589 | ||
| 577 | // This is necessary so accessing GetAxis with axis_x and axis_y won't crash | 590 | // This is necessary so accessing GetAxis with axis_x and axis_y won't crash |
| 578 | joystick->SetAxis(axis_x, 0); | 591 | joystick->SetAxis(axis_x, 0); |
| 579 | joystick->SetAxis(axis_y, 0); | 592 | joystick->SetAxis(axis_y, 0); |
| 580 | return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone, range); | 593 | return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone, |
| 594 | range); | ||
| 581 | } | 595 | } |
| 582 | 596 | ||
| 583 | private: | 597 | private: |
| @@ -886,6 +900,8 @@ Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& gui | |||
| 886 | params.Set("guid", guid); | 900 | params.Set("guid", guid); |
| 887 | params.Set("axis_x", axis_x); | 901 | params.Set("axis_x", axis_x); |
| 888 | params.Set("axis_y", axis_y); | 902 | params.Set("axis_y", axis_y); |
| 903 | params.Set("invert_x", "+"); | ||
| 904 | params.Set("invert_y", "+"); | ||
| 889 | return params; | 905 | return params; |
| 890 | } | 906 | } |
| 891 | } // Anonymous namespace | 907 | } // Anonymous namespace |
| @@ -1014,11 +1030,44 @@ public: | |||
| 1014 | } | 1030 | } |
| 1015 | return {}; | 1031 | return {}; |
| 1016 | } | 1032 | } |
| 1017 | [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(const SDL_Event& event) const { | 1033 | [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(SDL_Event& event) { |
| 1018 | switch (event.type) { | 1034 | switch (event.type) { |
| 1019 | case SDL_JOYAXISMOTION: | 1035 | case SDL_JOYAXISMOTION: |
| 1020 | if (std::abs(event.jaxis.value / 32767.0) < 0.5) { | 1036 | if (!axis_memory.count(event.jaxis.which) || |
| 1037 | !axis_memory[event.jaxis.which].count(event.jaxis.axis)) { | ||
| 1038 | axis_memory[event.jaxis.which][event.jaxis.axis] = event.jaxis.value; | ||
| 1039 | axis_event_count[event.jaxis.which][event.jaxis.axis] = 1; | ||
| 1021 | break; | 1040 | break; |
| 1041 | } else { | ||
| 1042 | axis_event_count[event.jaxis.which][event.jaxis.axis]++; | ||
| 1043 | // The joystick and axis exist in our map if we take this branch, so no checks | ||
| 1044 | // needed | ||
| 1045 | if (std::abs( | ||
| 1046 | (event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis]) / | ||
| 1047 | 32767.0) < 0.5) { | ||
| 1048 | break; | ||
| 1049 | } else { | ||
| 1050 | if (axis_event_count[event.jaxis.which][event.jaxis.axis] == 2 && | ||
| 1051 | IsAxisAtPole(event.jaxis.value) && | ||
| 1052 | IsAxisAtPole(axis_memory[event.jaxis.which][event.jaxis.axis])) { | ||
| 1053 | // If we have exactly two events and both are near a pole, this is | ||
| 1054 | // likely a digital input masquerading as an analog axis; Instead of | ||
| 1055 | // trying to look at the direction the axis travelled, assume the first | ||
| 1056 | // event was press and the second was release; This should handle most | ||
| 1057 | // digital axes while deferring to the direction of travel for analog | ||
| 1058 | // axes | ||
| 1059 | event.jaxis.value = static_cast<Sint16>( | ||
| 1060 | std::copysign(32767, axis_memory[event.jaxis.which][event.jaxis.axis])); | ||
| 1061 | } else { | ||
| 1062 | // There are more than two events, so this is likely a true analog axis, | ||
| 1063 | // check the direction it travelled | ||
| 1064 | event.jaxis.value = static_cast<Sint16>(std::copysign( | ||
| 1065 | 32767, | ||
| 1066 | event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis])); | ||
| 1067 | } | ||
| 1068 | axis_memory.clear(); | ||
| 1069 | axis_event_count.clear(); | ||
| 1070 | } | ||
| 1022 | } | 1071 | } |
| 1023 | [[fallthrough]]; | 1072 | [[fallthrough]]; |
| 1024 | case SDL_JOYBUTTONUP: | 1073 | case SDL_JOYBUTTONUP: |
| @@ -1027,6 +1076,16 @@ public: | |||
| 1027 | } | 1076 | } |
| 1028 | return std::nullopt; | 1077 | return std::nullopt; |
| 1029 | } | 1078 | } |
| 1079 | |||
| 1080 | private: | ||
| 1081 | // Determine whether an axis value is close to an extreme or center | ||
| 1082 | // Some controllers have a digital D-Pad as a pair of analog sticks, with 3 possible values per | ||
| 1083 | // axis, which is why the center must be considered a pole | ||
| 1084 | bool IsAxisAtPole(int16_t value) const { | ||
| 1085 | return std::abs(value) >= 32767 || std::abs(value) < 327; | ||
| 1086 | } | ||
| 1087 | std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, int16_t>> axis_memory; | ||
| 1088 | std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, uint32_t>> axis_event_count; | ||
| 1030 | }; | 1089 | }; |
| 1031 | 1090 | ||
| 1032 | class SDLMotionPoller final : public SDLPoller { | 1091 | class SDLMotionPoller final : public SDLPoller { |
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 17a9225d7..412d57896 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp | |||
| @@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) { | |||
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | void Client::OnPadData(Response::PadData data, std::size_t client) { | 227 | void Client::OnPadData(Response::PadData data, std::size_t client) { |
| 228 | // Accept packets only for the correct pad | ||
| 229 | if (static_cast<u8>(clients[client].pad_index) != data.info.id) { | ||
| 230 | return; | ||
| 231 | } | ||
| 232 | |||
| 228 | LOG_TRACE(Input, "PadData packet received"); | 233 | LOG_TRACE(Input, "PadData packet received"); |
| 229 | if (data.packet_counter == clients[client].packet_sequence) { | 234 | if (data.packet_counter == clients[client].packet_sequence) { |
| 230 | LOG_WARNING( | 235 | LOG_WARNING( |
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index 8686a059c..c5da27a38 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp | |||
| @@ -28,14 +28,14 @@ private: | |||
| 28 | mutable std::mutex mutex; | 28 | mutable std::mutex mutex; |
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | /// A motion device factory that creates motion devices from JC Adapter | 31 | /// A motion device factory that creates motion devices from a UDP client |
| 32 | UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_) | 32 | UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_) |
| 33 | : client(std::move(client_)) {} | 33 | : client(std::move(client_)) {} |
| 34 | 34 | ||
| 35 | /** | 35 | /** |
| 36 | * Creates motion device | 36 | * Creates motion device |
| 37 | * @param params contains parameters for creating the device: | 37 | * @param params contains parameters for creating the device: |
| 38 | * - "port": the nth jcpad on the adapter | 38 | * - "port": the UDP port number |
| 39 | */ | 39 | */ |
| 40 | std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) { | 40 | std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) { |
| 41 | auto ip = params.Get("ip", "127.0.0.1"); | 41 | auto ip = params.Get("ip", "127.0.0.1"); |
| @@ -90,14 +90,14 @@ private: | |||
| 90 | mutable std::mutex mutex; | 90 | mutable std::mutex mutex; |
| 91 | }; | 91 | }; |
| 92 | 92 | ||
| 93 | /// A motion device factory that creates motion devices from JC Adapter | 93 | /// A motion device factory that creates motion devices from a UDP client |
| 94 | UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_) | 94 | UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_) |
| 95 | : client(std::move(client_)) {} | 95 | : client(std::move(client_)) {} |
| 96 | 96 | ||
| 97 | /** | 97 | /** |
| 98 | * Creates motion device | 98 | * Creates motion device |
| 99 | * @param params contains parameters for creating the device: | 99 | * @param params contains parameters for creating the device: |
| 100 | * - "port": the nth jcpad on the adapter | 100 | * - "port": the UDP port number |
| 101 | */ | 101 | */ |
| 102 | std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) { | 102 | std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) { |
| 103 | auto ip = params.Get("ip", "127.0.0.1"); | 103 | auto ip = params.Get("ip", "127.0.0.1"); |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index d80b0b688..8a606b448 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -4,8 +4,6 @@ add_executable(tests | |||
| 4 | common/fibers.cpp | 4 | common/fibers.cpp |
| 5 | common/param_package.cpp | 5 | common/param_package.cpp |
| 6 | common/ring_buffer.cpp | 6 | common/ring_buffer.cpp |
| 7 | core/arm/arm_test_common.cpp | ||
| 8 | core/arm/arm_test_common.h | ||
| 9 | core/core_timing.cpp | 7 | core/core_timing.cpp |
| 10 | tests.cpp | 8 | tests.cpp |
| 11 | ) | 9 | ) |
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 4757dd2b4..d94492fc6 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp | |||
| @@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | /** This test checks for fiber thread exchange configuration and validates that fibers are | 209 | /** This test checks for fiber thread exchange configuration and validates that fibers are |
| 210 | * that a fiber has been succesfully transfered from one thread to another and that the TLS | 210 | * that a fiber has been successfully transferred from one thread to another and that the TLS |
| 211 | * region of the thread is kept while changing fibers. | 211 | * region of the thread is kept while changing fibers. |
| 212 | */ | 212 | */ |
| 213 | TEST_CASE("Fibers::InterExchange", "[common]") { | 213 | TEST_CASE("Fibers::InterExchange", "[common]") { |
| @@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) { | |||
| 299 | } | 299 | } |
| 300 | 300 | ||
| 301 | /** This test checks for one two threads racing for starting the same fiber. | 301 | /** This test checks for one two threads racing for starting the same fiber. |
| 302 | * It checks execution occured in an ordered manner and by no time there were | 302 | * It checks execution occurred in an ordered manner and by no time there were |
| 303 | * two contexts at the same time. | 303 | * two contexts at the same time. |
| 304 | */ | 304 | */ |
| 305 | TEST_CASE("Fibers::StartRace", "[common]") { | 305 | TEST_CASE("Fibers::StartRace", "[common]") { |
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp index c883c4d56..54def22da 100644 --- a/src/tests/common/ring_buffer.cpp +++ b/src/tests/common/ring_buffer.cpp | |||
| @@ -20,60 +20,60 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") { | |||
| 20 | for (std::size_t i = 0; i < 4; i++) { | 20 | for (std::size_t i = 0; i < 4; i++) { |
| 21 | const char elem = static_cast<char>(i); | 21 | const char elem = static_cast<char>(i); |
| 22 | const std::size_t count = buf.Push(&elem, 1); | 22 | const std::size_t count = buf.Push(&elem, 1); |
| 23 | REQUIRE(count == 1); | 23 | REQUIRE(count == 1U); |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | REQUIRE(buf.Size() == 4); | 26 | REQUIRE(buf.Size() == 4U); |
| 27 | 27 | ||
| 28 | // Pushing values into a full ring buffer should fail. | 28 | // Pushing values into a full ring buffer should fail. |
| 29 | { | 29 | { |
| 30 | const char elem = static_cast<char>(42); | 30 | const char elem = static_cast<char>(42); |
| 31 | const std::size_t count = buf.Push(&elem, 1); | 31 | const std::size_t count = buf.Push(&elem, 1); |
| 32 | REQUIRE(count == 0); | 32 | REQUIRE(count == 0U); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | REQUIRE(buf.Size() == 4); | 35 | REQUIRE(buf.Size() == 4U); |
| 36 | 36 | ||
| 37 | // Popping multiple values from a ring buffer with values should succeed. | 37 | // Popping multiple values from a ring buffer with values should succeed. |
| 38 | { | 38 | { |
| 39 | const std::vector<char> popped = buf.Pop(2); | 39 | const std::vector<char> popped = buf.Pop(2); |
| 40 | REQUIRE(popped.size() == 2); | 40 | REQUIRE(popped.size() == 2U); |
| 41 | REQUIRE(popped[0] == 0); | 41 | REQUIRE(popped[0] == 0); |
| 42 | REQUIRE(popped[1] == 1); | 42 | REQUIRE(popped[1] == 1); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | REQUIRE(buf.Size() == 2); | 45 | REQUIRE(buf.Size() == 2U); |
| 46 | 46 | ||
| 47 | // Popping a single value from a ring buffer with values should succeed. | 47 | // Popping a single value from a ring buffer with values should succeed. |
| 48 | { | 48 | { |
| 49 | const std::vector<char> popped = buf.Pop(1); | 49 | const std::vector<char> popped = buf.Pop(1); |
| 50 | REQUIRE(popped.size() == 1); | 50 | REQUIRE(popped.size() == 1U); |
| 51 | REQUIRE(popped[0] == 2); | 51 | REQUIRE(popped[0] == 2); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | REQUIRE(buf.Size() == 1); | 54 | REQUIRE(buf.Size() == 1U); |
| 55 | 55 | ||
| 56 | // Pushing more values than space available should partially suceed. | 56 | // Pushing more values than space available should partially suceed. |
| 57 | { | 57 | { |
| 58 | std::vector<char> to_push(6); | 58 | std::vector<char> to_push(6); |
| 59 | std::iota(to_push.begin(), to_push.end(), 88); | 59 | std::iota(to_push.begin(), to_push.end(), 88); |
| 60 | const std::size_t count = buf.Push(to_push); | 60 | const std::size_t count = buf.Push(to_push); |
| 61 | REQUIRE(count == 3); | 61 | REQUIRE(count == 3U); |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | REQUIRE(buf.Size() == 4); | 64 | REQUIRE(buf.Size() == 4U); |
| 65 | 65 | ||
| 66 | // Doing an unlimited pop should pop all values. | 66 | // Doing an unlimited pop should pop all values. |
| 67 | { | 67 | { |
| 68 | const std::vector<char> popped = buf.Pop(); | 68 | const std::vector<char> popped = buf.Pop(); |
| 69 | REQUIRE(popped.size() == 4); | 69 | REQUIRE(popped.size() == 4U); |
| 70 | REQUIRE(popped[0] == 3); | 70 | REQUIRE(popped[0] == 3); |
| 71 | REQUIRE(popped[1] == 88); | 71 | REQUIRE(popped[1] == 88); |
| 72 | REQUIRE(popped[2] == 89); | 72 | REQUIRE(popped[2] == 89); |
| 73 | REQUIRE(popped[3] == 90); | 73 | REQUIRE(popped[3] == 90); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | REQUIRE(buf.Size() == 0); | 76 | REQUIRE(buf.Size() == 0U); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | TEST_CASE("RingBuffer: Threaded Test", "[common]") { | 79 | TEST_CASE("RingBuffer: Threaded Test", "[common]") { |
| @@ -93,7 +93,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { | |||
| 93 | std::size_t i = 0; | 93 | std::size_t i = 0; |
| 94 | while (i < count) { | 94 | while (i < count) { |
| 95 | if (const std::size_t c = buf.Push(&value[0], 1); c > 0) { | 95 | if (const std::size_t c = buf.Push(&value[0], 1); c > 0) { |
| 96 | REQUIRE(c == 1); | 96 | REQUIRE(c == 1U); |
| 97 | i++; | 97 | i++; |
| 98 | next_value(value); | 98 | next_value(value); |
| 99 | } else { | 99 | } else { |
| @@ -108,7 +108,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { | |||
| 108 | std::size_t i = 0; | 108 | std::size_t i = 0; |
| 109 | while (i < count) { | 109 | while (i < count) { |
| 110 | if (const std::vector<char> v = buf.Pop(1); v.size() > 0) { | 110 | if (const std::vector<char> v = buf.Pop(1); v.size() > 0) { |
| 111 | REQUIRE(v.size() == 2); | 111 | REQUIRE(v.size() == 2U); |
| 112 | REQUIRE(v[0] == value[0]); | 112 | REQUIRE(v[0] == value[0]); |
| 113 | REQUIRE(v[1] == value[1]); | 113 | REQUIRE(v[1] == value[1]); |
| 114 | i++; | 114 | i++; |
| @@ -123,7 +123,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") { | |||
| 123 | producer.join(); | 123 | producer.join(); |
| 124 | consumer.join(); | 124 | consumer.join(); |
| 125 | 125 | ||
| 126 | REQUIRE(buf.Size() == 0); | 126 | REQUIRE(buf.Size() == 0U); |
| 127 | printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty); | 127 | printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty); |
| 128 | } | 128 | } |
| 129 | 129 | ||
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp deleted file mode 100644 index e54674d11..000000000 --- a/src/tests/core/arm/arm_test_common.cpp +++ /dev/null | |||
| @@ -1,145 +0,0 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "common/page_table.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "core/hle/kernel/memory/page_table.h" | ||
| 10 | #include "core/hle/kernel/process.h" | ||
| 11 | #include "core/memory.h" | ||
| 12 | #include "tests/core/arm/arm_test_common.h" | ||
| 13 | |||
| 14 | namespace ArmTests { | ||
| 15 | |||
| 16 | TestEnvironment::TestEnvironment(bool mutable_memory_) | ||
| 17 | : mutable_memory(mutable_memory_), | ||
| 18 | test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { | ||
| 19 | auto& system = Core::System::GetInstance(); | ||
| 20 | |||
| 21 | auto process = Kernel::Process::Create(system, "", Kernel::Process::ProcessType::Userland); | ||
| 22 | page_table = &process->PageTable().PageTableImpl(); | ||
| 23 | |||
| 24 | system.Memory().MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); | ||
| 25 | system.Memory().MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); | ||
| 26 | |||
| 27 | kernel.MakeCurrentProcess(process.get()); | ||
| 28 | } | ||
| 29 | |||
| 30 | TestEnvironment::~TestEnvironment() { | ||
| 31 | auto& system = Core::System::GetInstance(); | ||
| 32 | system.Memory().UnmapRegion(*page_table, 0x80000000, 0x80000000); | ||
| 33 | system.Memory().UnmapRegion(*page_table, 0x00000000, 0x80000000); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) { | ||
| 37 | SetMemory32(vaddr + 0, static_cast<u32>(value)); | ||
| 38 | SetMemory32(vaddr + 4, static_cast<u32>(value >> 32)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TestEnvironment::SetMemory32(VAddr vaddr, u32 value) { | ||
| 42 | SetMemory16(vaddr + 0, static_cast<u16>(value)); | ||
| 43 | SetMemory16(vaddr + 2, static_cast<u16>(value >> 16)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TestEnvironment::SetMemory16(VAddr vaddr, u16 value) { | ||
| 47 | SetMemory8(vaddr + 0, static_cast<u8>(value)); | ||
| 48 | SetMemory8(vaddr + 1, static_cast<u8>(value >> 8)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TestEnvironment::SetMemory8(VAddr vaddr, u8 value) { | ||
| 52 | test_memory->data[vaddr] = value; | ||
| 53 | } | ||
| 54 | |||
| 55 | std::vector<WriteRecord> TestEnvironment::GetWriteRecords() const { | ||
| 56 | return write_records; | ||
| 57 | } | ||
| 58 | |||
| 59 | void TestEnvironment::ClearWriteRecords() { | ||
| 60 | write_records.clear(); | ||
| 61 | } | ||
| 62 | |||
| 63 | TestEnvironment::TestMemory::~TestMemory() {} | ||
| 64 | |||
| 65 | std::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) { | ||
| 66 | return true; | ||
| 67 | } | ||
| 68 | |||
| 69 | std::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) { | ||
| 70 | const auto iter = data.find(addr); | ||
| 71 | |||
| 72 | if (iter == data.end()) { | ||
| 73 | // Some arbitrary data | ||
| 74 | return static_cast<u8>(addr); | ||
| 75 | } | ||
| 76 | |||
| 77 | return iter->second; | ||
| 78 | } | ||
| 79 | |||
| 80 | std::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) { | ||
| 81 | return *Read8(addr) | static_cast<u16>(*Read8(addr + 1)) << 8; | ||
| 82 | } | ||
| 83 | |||
| 84 | std::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) { | ||
| 85 | return *Read16(addr) | static_cast<u32>(*Read16(addr + 2)) << 16; | ||
| 86 | } | ||
| 87 | |||
| 88 | std::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) { | ||
| 89 | return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32; | ||
| 90 | } | ||
| 91 | |||
| 92 | bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) { | ||
| 93 | VAddr addr = src_addr; | ||
| 94 | u8* data = static_cast<u8*>(dest_buffer); | ||
| 95 | |||
| 96 | for (std::size_t i = 0; i < size; i++, addr++, data++) { | ||
| 97 | *data = *Read8(addr); | ||
| 98 | } | ||
| 99 | |||
| 100 | return true; | ||
| 101 | } | ||
| 102 | |||
| 103 | bool TestEnvironment::TestMemory::Write8(VAddr addr, u8 data) { | ||
| 104 | env->write_records.emplace_back(8, addr, data); | ||
| 105 | if (env->mutable_memory) | ||
| 106 | env->SetMemory8(addr, data); | ||
| 107 | return true; | ||
| 108 | } | ||
| 109 | |||
| 110 | bool TestEnvironment::TestMemory::Write16(VAddr addr, u16 data) { | ||
| 111 | env->write_records.emplace_back(16, addr, data); | ||
| 112 | if (env->mutable_memory) | ||
| 113 | env->SetMemory16(addr, data); | ||
| 114 | return true; | ||
| 115 | } | ||
| 116 | |||
| 117 | bool TestEnvironment::TestMemory::Write32(VAddr addr, u32 data) { | ||
| 118 | env->write_records.emplace_back(32, addr, data); | ||
| 119 | if (env->mutable_memory) | ||
| 120 | env->SetMemory32(addr, data); | ||
| 121 | return true; | ||
| 122 | } | ||
| 123 | |||
| 124 | bool TestEnvironment::TestMemory::Write64(VAddr addr, u64 data) { | ||
| 125 | env->write_records.emplace_back(64, addr, data); | ||
| 126 | if (env->mutable_memory) | ||
| 127 | env->SetMemory64(addr, data); | ||
| 128 | return true; | ||
| 129 | } | ||
| 130 | |||
| 131 | bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer, | ||
| 132 | std::size_t size) { | ||
| 133 | VAddr addr = dest_addr; | ||
| 134 | const u8* data = static_cast<const u8*>(src_buffer); | ||
| 135 | |||
| 136 | for (std::size_t i = 0; i < size; i++, addr++, data++) { | ||
| 137 | env->write_records.emplace_back(8, addr, *data); | ||
| 138 | if (env->mutable_memory) | ||
| 139 | env->SetMemory8(addr, *data); | ||
| 140 | } | ||
| 141 | |||
| 142 | return true; | ||
| 143 | } | ||
| 144 | |||
| 145 | } // namespace ArmTests | ||
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h deleted file mode 100644 index d145dbfcc..000000000 --- a/src/tests/core/arm/arm_test_common.h +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <tuple> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/memory_hook.h" | ||
| 13 | #include "core/hle/kernel/kernel.h" | ||
| 14 | |||
| 15 | namespace Common { | ||
| 16 | struct PageTable; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace ArmTests { | ||
| 20 | |||
| 21 | struct WriteRecord { | ||
| 22 | WriteRecord(std::size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {} | ||
| 23 | std::size_t size; | ||
| 24 | VAddr addr; | ||
| 25 | u64 data; | ||
| 26 | bool operator==(const WriteRecord& o) const { | ||
| 27 | return std::tie(size, addr, data) == std::tie(o.size, o.addr, o.data); | ||
| 28 | } | ||
| 29 | }; | ||
| 30 | |||
| 31 | class TestEnvironment final { | ||
| 32 | public: | ||
| 33 | /* | ||
| 34 | * Inititalise test environment | ||
| 35 | * @param mutable_memory If false, writes to memory can never be read back. | ||
| 36 | * (Memory is immutable.) | ||
| 37 | */ | ||
| 38 | explicit TestEnvironment(bool mutable_memory = false); | ||
| 39 | |||
| 40 | /// Shutdown test environment | ||
| 41 | ~TestEnvironment(); | ||
| 42 | |||
| 43 | /// Sets value at memory location vaddr. | ||
| 44 | void SetMemory8(VAddr vaddr, u8 value); | ||
| 45 | void SetMemory16(VAddr vaddr, u16 value); | ||
| 46 | void SetMemory32(VAddr vaddr, u32 value); | ||
| 47 | void SetMemory64(VAddr vaddr, u64 value); | ||
| 48 | |||
| 49 | /** | ||
| 50 | * Whenever Memory::Write{8,16,32,64} is called within the test environment, | ||
| 51 | * a new write-record is made. | ||
| 52 | * @returns A vector of write records made since they were last cleared. | ||
| 53 | */ | ||
| 54 | std::vector<WriteRecord> GetWriteRecords() const; | ||
| 55 | |||
| 56 | /// Empties the internal write-record store. | ||
| 57 | void ClearWriteRecords(); | ||
| 58 | |||
| 59 | private: | ||
| 60 | friend struct TestMemory; | ||
| 61 | struct TestMemory final : Common::MemoryHook { | ||
| 62 | explicit TestMemory(TestEnvironment* env_) : env(env_) {} | ||
| 63 | TestEnvironment* env; | ||
| 64 | |||
| 65 | ~TestMemory() override; | ||
| 66 | |||
| 67 | std::optional<bool> IsValidAddress(VAddr addr) override; | ||
| 68 | |||
| 69 | std::optional<u8> Read8(VAddr addr) override; | ||
| 70 | std::optional<u16> Read16(VAddr addr) override; | ||
| 71 | std::optional<u32> Read32(VAddr addr) override; | ||
| 72 | std::optional<u64> Read64(VAddr addr) override; | ||
| 73 | |||
| 74 | bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override; | ||
| 75 | |||
| 76 | bool Write8(VAddr addr, u8 data) override; | ||
| 77 | bool Write16(VAddr addr, u16 data) override; | ||
| 78 | bool Write32(VAddr addr, u32 data) override; | ||
| 79 | bool Write64(VAddr addr, u64 data) override; | ||
| 80 | |||
| 81 | bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) override; | ||
| 82 | |||
| 83 | std::unordered_map<VAddr, u8> data; | ||
| 84 | }; | ||
| 85 | |||
| 86 | bool mutable_memory; | ||
| 87 | std::shared_ptr<TestMemory> test_memory; | ||
| 88 | std::vector<WriteRecord> write_records; | ||
| 89 | Common::PageTable* page_table = nullptr; | ||
| 90 | Kernel::KernelCore kernel; | ||
| 91 | }; | ||
| 92 | |||
| 93 | } // namespace ArmTests | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b73724ce..f7b9d7f86 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -25,6 +25,7 @@ add_library(video_core STATIC | |||
| 25 | command_classes/vic.h | 25 | command_classes/vic.h |
| 26 | compatible_formats.cpp | 26 | compatible_formats.cpp |
| 27 | compatible_formats.h | 27 | compatible_formats.h |
| 28 | delayed_destruction_ring.h | ||
| 28 | dirty_flags.cpp | 29 | dirty_flags.cpp |
| 29 | dirty_flags.h | 30 | dirty_flags.h |
| 30 | dma_pusher.cpp | 31 | dma_pusher.cpp |
| @@ -47,6 +48,7 @@ add_library(video_core STATIC | |||
| 47 | engines/shader_bytecode.h | 48 | engines/shader_bytecode.h |
| 48 | engines/shader_header.h | 49 | engines/shader_header.h |
| 49 | engines/shader_type.h | 50 | engines/shader_type.h |
| 51 | framebuffer_config.h | ||
| 50 | macro/macro.cpp | 52 | macro/macro.cpp |
| 51 | macro/macro.h | 53 | macro/macro.h |
| 52 | macro/macro_hle.cpp | 54 | macro/macro_hle.cpp |
| @@ -58,10 +60,6 @@ add_library(video_core STATIC | |||
| 58 | fence_manager.h | 60 | fence_manager.h |
| 59 | gpu.cpp | 61 | gpu.cpp |
| 60 | gpu.h | 62 | gpu.h |
| 61 | gpu_asynch.cpp | ||
| 62 | gpu_asynch.h | ||
| 63 | gpu_synch.cpp | ||
| 64 | gpu_synch.h | ||
| 65 | gpu_thread.cpp | 63 | gpu_thread.cpp |
| 66 | gpu_thread.h | 64 | gpu_thread.h |
| 67 | guest_driver.cpp | 65 | guest_driver.cpp |
| @@ -84,14 +82,10 @@ add_library(video_core STATIC | |||
| 84 | renderer_opengl/gl_device.h | 82 | renderer_opengl/gl_device.h |
| 85 | renderer_opengl/gl_fence_manager.cpp | 83 | renderer_opengl/gl_fence_manager.cpp |
| 86 | renderer_opengl/gl_fence_manager.h | 84 | renderer_opengl/gl_fence_manager.h |
| 87 | renderer_opengl/gl_framebuffer_cache.cpp | ||
| 88 | renderer_opengl/gl_framebuffer_cache.h | ||
| 89 | renderer_opengl/gl_rasterizer.cpp | 85 | renderer_opengl/gl_rasterizer.cpp |
| 90 | renderer_opengl/gl_rasterizer.h | 86 | renderer_opengl/gl_rasterizer.h |
| 91 | renderer_opengl/gl_resource_manager.cpp | 87 | renderer_opengl/gl_resource_manager.cpp |
| 92 | renderer_opengl/gl_resource_manager.h | 88 | renderer_opengl/gl_resource_manager.h |
| 93 | renderer_opengl/gl_sampler_cache.cpp | ||
| 94 | renderer_opengl/gl_sampler_cache.h | ||
| 95 | renderer_opengl/gl_shader_cache.cpp | 89 | renderer_opengl/gl_shader_cache.cpp |
| 96 | renderer_opengl/gl_shader_cache.h | 90 | renderer_opengl/gl_shader_cache.h |
| 97 | renderer_opengl/gl_shader_decompiler.cpp | 91 | renderer_opengl/gl_shader_decompiler.cpp |
| @@ -113,14 +107,14 @@ add_library(video_core STATIC | |||
| 113 | renderer_opengl/maxwell_to_gl.h | 107 | renderer_opengl/maxwell_to_gl.h |
| 114 | renderer_opengl/renderer_opengl.cpp | 108 | renderer_opengl/renderer_opengl.cpp |
| 115 | renderer_opengl/renderer_opengl.h | 109 | renderer_opengl/renderer_opengl.h |
| 116 | renderer_opengl/utils.cpp | 110 | renderer_opengl/util_shaders.cpp |
| 117 | renderer_opengl/utils.h | 111 | renderer_opengl/util_shaders.h |
| 112 | renderer_vulkan/blit_image.cpp | ||
| 113 | renderer_vulkan/blit_image.h | ||
| 118 | renderer_vulkan/fixed_pipeline_state.cpp | 114 | renderer_vulkan/fixed_pipeline_state.cpp |
| 119 | renderer_vulkan/fixed_pipeline_state.h | 115 | renderer_vulkan/fixed_pipeline_state.h |
| 120 | renderer_vulkan/maxwell_to_vk.cpp | 116 | renderer_vulkan/maxwell_to_vk.cpp |
| 121 | renderer_vulkan/maxwell_to_vk.h | 117 | renderer_vulkan/maxwell_to_vk.h |
| 122 | renderer_vulkan/nsight_aftermath_tracker.cpp | ||
| 123 | renderer_vulkan/nsight_aftermath_tracker.h | ||
| 124 | renderer_vulkan/renderer_vulkan.h | 118 | renderer_vulkan/renderer_vulkan.h |
| 125 | renderer_vulkan/renderer_vulkan.cpp | 119 | renderer_vulkan/renderer_vulkan.cpp |
| 126 | renderer_vulkan/vk_blit_screen.cpp | 120 | renderer_vulkan/vk_blit_screen.cpp |
| @@ -135,14 +129,10 @@ add_library(video_core STATIC | |||
| 135 | renderer_vulkan/vk_compute_pipeline.h | 129 | renderer_vulkan/vk_compute_pipeline.h |
| 136 | renderer_vulkan/vk_descriptor_pool.cpp | 130 | renderer_vulkan/vk_descriptor_pool.cpp |
| 137 | renderer_vulkan/vk_descriptor_pool.h | 131 | renderer_vulkan/vk_descriptor_pool.h |
| 138 | renderer_vulkan/vk_device.cpp | ||
| 139 | renderer_vulkan/vk_device.h | ||
| 140 | renderer_vulkan/vk_fence_manager.cpp | 132 | renderer_vulkan/vk_fence_manager.cpp |
| 141 | renderer_vulkan/vk_fence_manager.h | 133 | renderer_vulkan/vk_fence_manager.h |
| 142 | renderer_vulkan/vk_graphics_pipeline.cpp | 134 | renderer_vulkan/vk_graphics_pipeline.cpp |
| 143 | renderer_vulkan/vk_graphics_pipeline.h | 135 | renderer_vulkan/vk_graphics_pipeline.h |
| 144 | renderer_vulkan/vk_image.cpp | ||
| 145 | renderer_vulkan/vk_image.h | ||
| 146 | renderer_vulkan/vk_master_semaphore.cpp | 136 | renderer_vulkan/vk_master_semaphore.cpp |
| 147 | renderer_vulkan/vk_master_semaphore.h | 137 | renderer_vulkan/vk_master_semaphore.h |
| 148 | renderer_vulkan/vk_memory_manager.cpp | 138 | renderer_vulkan/vk_memory_manager.cpp |
| @@ -153,12 +143,8 @@ add_library(video_core STATIC | |||
| 153 | renderer_vulkan/vk_query_cache.h | 143 | renderer_vulkan/vk_query_cache.h |
| 154 | renderer_vulkan/vk_rasterizer.cpp | 144 | renderer_vulkan/vk_rasterizer.cpp |
| 155 | renderer_vulkan/vk_rasterizer.h | 145 | renderer_vulkan/vk_rasterizer.h |
| 156 | renderer_vulkan/vk_renderpass_cache.cpp | ||
| 157 | renderer_vulkan/vk_renderpass_cache.h | ||
| 158 | renderer_vulkan/vk_resource_pool.cpp | 146 | renderer_vulkan/vk_resource_pool.cpp |
| 159 | renderer_vulkan/vk_resource_pool.h | 147 | renderer_vulkan/vk_resource_pool.h |
| 160 | renderer_vulkan/vk_sampler_cache.cpp | ||
| 161 | renderer_vulkan/vk_sampler_cache.h | ||
| 162 | renderer_vulkan/vk_scheduler.cpp | 148 | renderer_vulkan/vk_scheduler.cpp |
| 163 | renderer_vulkan/vk_scheduler.h | 149 | renderer_vulkan/vk_scheduler.h |
| 164 | renderer_vulkan/vk_shader_decompiler.cpp | 150 | renderer_vulkan/vk_shader_decompiler.cpp |
| @@ -177,10 +163,6 @@ add_library(video_core STATIC | |||
| 177 | renderer_vulkan/vk_texture_cache.h | 163 | renderer_vulkan/vk_texture_cache.h |
| 178 | renderer_vulkan/vk_update_descriptor.cpp | 164 | renderer_vulkan/vk_update_descriptor.cpp |
| 179 | renderer_vulkan/vk_update_descriptor.h | 165 | renderer_vulkan/vk_update_descriptor.h |
| 180 | renderer_vulkan/wrapper.cpp | ||
| 181 | renderer_vulkan/wrapper.h | ||
| 182 | sampler_cache.cpp | ||
| 183 | sampler_cache.h | ||
| 184 | shader_cache.h | 166 | shader_cache.h |
| 185 | shader_notify.cpp | 167 | shader_notify.cpp |
| 186 | shader_notify.h | 168 | shader_notify.h |
| @@ -237,25 +219,52 @@ add_library(video_core STATIC | |||
| 237 | shader/transform_feedback.h | 219 | shader/transform_feedback.h |
| 238 | surface.cpp | 220 | surface.cpp |
| 239 | surface.h | 221 | surface.h |
| 222 | texture_cache/accelerated_swizzle.cpp | ||
| 223 | texture_cache/accelerated_swizzle.h | ||
| 224 | texture_cache/decode_bc4.cpp | ||
| 225 | texture_cache/decode_bc4.h | ||
| 226 | texture_cache/descriptor_table.h | ||
| 227 | texture_cache/formatter.cpp | ||
| 228 | texture_cache/formatter.h | ||
| 240 | texture_cache/format_lookup_table.cpp | 229 | texture_cache/format_lookup_table.cpp |
| 241 | texture_cache/format_lookup_table.h | 230 | texture_cache/format_lookup_table.h |
| 242 | texture_cache/surface_base.cpp | 231 | texture_cache/image_base.cpp |
| 243 | texture_cache/surface_base.h | 232 | texture_cache/image_base.h |
| 244 | texture_cache/surface_params.cpp | 233 | texture_cache/image_info.cpp |
| 245 | texture_cache/surface_params.h | 234 | texture_cache/image_info.h |
| 246 | texture_cache/surface_view.cpp | 235 | texture_cache/image_view_base.cpp |
| 247 | texture_cache/surface_view.h | 236 | texture_cache/image_view_base.h |
| 237 | texture_cache/image_view_info.cpp | ||
| 238 | texture_cache/image_view_info.h | ||
| 239 | texture_cache/render_targets.h | ||
| 240 | texture_cache/samples_helper.h | ||
| 241 | texture_cache/slot_vector.h | ||
| 248 | texture_cache/texture_cache.h | 242 | texture_cache/texture_cache.h |
| 243 | texture_cache/types.h | ||
| 244 | texture_cache/util.cpp | ||
| 245 | texture_cache/util.h | ||
| 249 | textures/astc.cpp | 246 | textures/astc.cpp |
| 250 | textures/astc.h | 247 | textures/astc.h |
| 251 | textures/convert.cpp | ||
| 252 | textures/convert.h | ||
| 253 | textures/decoders.cpp | 248 | textures/decoders.cpp |
| 254 | textures/decoders.h | 249 | textures/decoders.h |
| 255 | textures/texture.cpp | 250 | textures/texture.cpp |
| 256 | textures/texture.h | 251 | textures/texture.h |
| 257 | video_core.cpp | 252 | video_core.cpp |
| 258 | video_core.h | 253 | video_core.h |
| 254 | vulkan_common/vulkan_debug_callback.cpp | ||
| 255 | vulkan_common/vulkan_debug_callback.h | ||
| 256 | vulkan_common/vulkan_device.cpp | ||
| 257 | vulkan_common/vulkan_device.h | ||
| 258 | vulkan_common/vulkan_instance.cpp | ||
| 259 | vulkan_common/vulkan_instance.h | ||
| 260 | vulkan_common/vulkan_library.cpp | ||
| 261 | vulkan_common/vulkan_library.h | ||
| 262 | vulkan_common/vulkan_surface.cpp | ||
| 263 | vulkan_common/vulkan_surface.h | ||
| 264 | vulkan_common/vulkan_wrapper.cpp | ||
| 265 | vulkan_common/vulkan_wrapper.h | ||
| 266 | vulkan_common/nsight_aftermath_tracker.cpp | ||
| 267 | vulkan_common/nsight_aftermath_tracker.h | ||
| 259 | ) | 268 | ) |
| 260 | 269 | ||
| 261 | create_target_directory_groups(video_core) | 270 | create_target_directory_groups(video_core) |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 38961f3fd..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -118,20 +118,17 @@ public: | |||
| 118 | /// Prepares the buffer cache for data uploading | 118 | /// Prepares the buffer cache for data uploading |
| 119 | /// @param max_size Maximum number of bytes that will be uploaded | 119 | /// @param max_size Maximum number of bytes that will be uploaded |
| 120 | /// @return True when a stream buffer invalidation was required, false otherwise | 120 | /// @return True when a stream buffer invalidation was required, false otherwise |
| 121 | bool Map(std::size_t max_size) { | 121 | void Map(std::size_t max_size) { |
| 122 | std::lock_guard lock{mutex}; | 122 | std::lock_guard lock{mutex}; |
| 123 | 123 | ||
| 124 | bool invalidated; | 124 | std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); |
| 125 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 126 | buffer_offset = buffer_offset_base; | 125 | buffer_offset = buffer_offset_base; |
| 127 | |||
| 128 | return invalidated; | ||
| 129 | } | 126 | } |
| 130 | 127 | ||
| 131 | /// Finishes the upload stream | 128 | /// Finishes the upload stream |
| 132 | void Unmap() { | 129 | void Unmap() { |
| 133 | std::lock_guard lock{mutex}; | 130 | std::lock_guard lock{mutex}; |
| 134 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | 131 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); |
| 135 | } | 132 | } |
| 136 | 133 | ||
| 137 | /// Function called at the end of each frame, inteded for deferred operations | 134 | /// Function called at the end of each frame, inteded for deferred operations |
| @@ -261,9 +258,9 @@ public: | |||
| 261 | protected: | 258 | protected: |
| 262 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 259 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 263 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 260 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 264 | std::unique_ptr<StreamBuffer> stream_buffer_) | 261 | StreamBuffer& stream_buffer_) |
| 265 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, | 262 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, |
| 266 | stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} | 263 | stream_buffer{stream_buffer_} {} |
| 267 | 264 | ||
| 268 | ~BufferCache() = default; | 265 | ~BufferCache() = default; |
| 269 | 266 | ||
| @@ -441,7 +438,7 @@ private: | |||
| 441 | 438 | ||
| 442 | buffer_ptr += size; | 439 | buffer_ptr += size; |
| 443 | buffer_offset += size; | 440 | buffer_offset += size; |
| 444 | return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; | 441 | return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; |
| 445 | } | 442 | } |
| 446 | 443 | ||
| 447 | void AlignBuffer(std::size_t alignment) { | 444 | void AlignBuffer(std::size_t alignment) { |
| @@ -567,9 +564,7 @@ private: | |||
| 567 | VideoCore::RasterizerInterface& rasterizer; | 564 | VideoCore::RasterizerInterface& rasterizer; |
| 568 | Tegra::MemoryManager& gpu_memory; | 565 | Tegra::MemoryManager& gpu_memory; |
| 569 | Core::Memory::Memory& cpu_memory; | 566 | Core::Memory::Memory& cpu_memory; |
| 570 | 567 | StreamBuffer& stream_buffer; | |
| 571 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 572 | BufferType stream_buffer_handle; | ||
| 573 | 568 | ||
| 574 | u8* buffer_ptr = nullptr; | 569 | u8* buffer_ptr = nullptr; |
| 575 | u64 buffer_offset = 0; | 570 | u64 buffer_offset = 0; |
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f7..94679d5d1 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) | |||
| 33 | : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), | 33 | : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), |
| 34 | vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), | 34 | vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), |
| 35 | host1x_processor(std::make_unique<Host1x>(gpu)), | 35 | host1x_processor(std::make_unique<Host1x>(gpu)), |
| 36 | nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), | 36 | sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {} |
| 37 | vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {} | ||
| 38 | 37 | ||
| 39 | CDmaPusher::~CDmaPusher() = default; | 38 | CDmaPusher::~CDmaPusher() = default; |
| 40 | 39 | ||
| @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { | |||
| 110 | const auto syncpoint_id = static_cast<u32>(data & 0xFF); | 109 | const auto syncpoint_id = static_cast<u32>(data & 0xFF); |
| 111 | const auto cond = static_cast<u32>((data >> 8) & 0xFF); | 110 | const auto cond = static_cast<u32>((data >> 8) & 0xFF); |
| 112 | if (cond == 0) { | 111 | if (cond == 0) { |
| 113 | nvdec_sync->Increment(syncpoint_id); | 112 | sync_manager->Increment(syncpoint_id); |
| 114 | } else { | 113 | } else { |
| 115 | nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); | 114 | sync_manager->SignalDone( |
| 116 | nvdec_sync->SignalDone(syncpoint_id); | 115 | sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id)); |
| 117 | } | 116 | } |
| 118 | break; | 117 | break; |
| 119 | } | 118 | } |
| @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { | |||
| 135 | const auto syncpoint_id = static_cast<u32>(data & 0xFF); | 134 | const auto syncpoint_id = static_cast<u32>(data & 0xFF); |
| 136 | const auto cond = static_cast<u32>((data >> 8) & 0xFF); | 135 | const auto cond = static_cast<u32>((data >> 8) & 0xFF); |
| 137 | if (cond == 0) { | 136 | if (cond == 0) { |
| 138 | vic_sync->Increment(syncpoint_id); | 137 | sync_manager->Increment(syncpoint_id); |
| 139 | } else { | 138 | } else { |
| 140 | vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); | 139 | sync_manager->SignalDone( |
| 141 | vic_sync->SignalDone(syncpoint_id); | 140 | sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id)); |
| 142 | } | 141 | } |
| 143 | break; | 142 | break; |
| 144 | } | 143 | } |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 0db1cd646..8ca70b6dd 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -116,12 +116,10 @@ private: | |||
| 116 | void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); | 116 | void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); |
| 117 | 117 | ||
| 118 | GPU& gpu; | 118 | GPU& gpu; |
| 119 | 119 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | |
| 120 | std::shared_ptr<Nvdec> nvdec_processor; | 120 | std::unique_ptr<Tegra::Vic> vic_processor; |
| 121 | std::unique_ptr<Vic> vic_processor; | 121 | std::unique_ptr<Tegra::Host1x> host1x_processor; |
| 122 | std::unique_ptr<Host1x> host1x_processor; | 122 | std::unique_ptr<SyncptIncrManager> sync_manager; |
| 123 | std::unique_ptr<SyncptIncrManager> nvdec_sync; | ||
| 124 | std::unique_ptr<SyncptIncrManager> vic_sync; | ||
| 125 | ChClassId current_class{}; | 123 | ChClassId current_class{}; |
| 126 | ThiRegisters vic_thi_state{}; | 124 | ThiRegisters vic_thi_state{}; |
| 127 | ThiRegisters nvdec_thi_state{}; | 125 | ThiRegisters nvdec_thi_state{}; |
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index c4dd4881a..b12494528 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp | |||
| @@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} | |||
| 10 | 10 | ||
| 11 | Tegra::Host1x::~Host1x() = default; | 11 | Tegra::Host1x::~Host1x() = default; |
| 12 | 12 | ||
| 13 | void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { | 13 | void Tegra::Host1x::ProcessMethod(Method method, u32 argument) { |
| 14 | u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32); | ||
| 15 | std::memcpy(state_offset, &arguments, sizeof(u32)); | ||
| 16 | } | ||
| 17 | |||
| 18 | void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) { | ||
| 19 | StateWrite(static_cast<u32>(method), arguments[0]); | ||
| 20 | switch (method) { | 14 | switch (method) { |
| 21 | case Method::WaitSyncpt: | ||
| 22 | Execute(arguments[0]); | ||
| 23 | break; | ||
| 24 | case Method::LoadSyncptPayload32: | 15 | case Method::LoadSyncptPayload32: |
| 25 | syncpoint_value = arguments[0]; | 16 | syncpoint_value = argument; |
| 26 | break; | 17 | break; |
| 18 | case Method::WaitSyncpt: | ||
| 27 | case Method::WaitSyncpt32: | 19 | case Method::WaitSyncpt32: |
| 28 | Execute(arguments[0]); | 20 | Execute(argument); |
| 29 | break; | 21 | break; |
| 30 | default: | 22 | default: |
| 31 | UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); | 23 | UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); |
| @@ -34,6 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen | |||
| 34 | } | 26 | } |
| 35 | 27 | ||
| 36 | void Tegra::Host1x::Execute(u32 data) { | 28 | void Tegra::Host1x::Execute(u32 data) { |
| 37 | // This method waits on a valid syncpoint. | 29 | gpu.WaitFence(data, syncpoint_value); |
| 38 | // TODO: Implement when proper Async is in place | ||
| 39 | } | 30 | } |
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 013eaa0c1..7e94799dd 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h | |||
| @@ -14,64 +14,23 @@ class Nvdec; | |||
| 14 | 14 | ||
| 15 | class Host1x { | 15 | class Host1x { |
| 16 | public: | 16 | public: |
| 17 | struct Host1xClassRegisters { | ||
| 18 | u32 incr_syncpt{}; | ||
| 19 | u32 incr_syncpt_ctrl{}; | ||
| 20 | u32 incr_syncpt_error{}; | ||
| 21 | INSERT_PADDING_WORDS(5); | ||
| 22 | u32 wait_syncpt{}; | ||
| 23 | u32 wait_syncpt_base{}; | ||
| 24 | u32 wait_syncpt_incr{}; | ||
| 25 | u32 load_syncpt_base{}; | ||
| 26 | u32 incr_syncpt_base{}; | ||
| 27 | u32 clear{}; | ||
| 28 | u32 wait{}; | ||
| 29 | u32 wait_with_interrupt{}; | ||
| 30 | u32 delay_use{}; | ||
| 31 | u32 tick_count_high{}; | ||
| 32 | u32 tick_count_low{}; | ||
| 33 | u32 tick_ctrl{}; | ||
| 34 | INSERT_PADDING_WORDS(23); | ||
| 35 | u32 ind_ctrl{}; | ||
| 36 | u32 ind_off2{}; | ||
| 37 | u32 ind_off{}; | ||
| 38 | std::array<u32, 31> ind_data{}; | ||
| 39 | INSERT_PADDING_WORDS(1); | ||
| 40 | u32 load_syncpoint_payload32{}; | ||
| 41 | u32 stall_ctrl{}; | ||
| 42 | u32 wait_syncpt32{}; | ||
| 43 | u32 wait_syncpt_base32{}; | ||
| 44 | u32 load_syncpt_base32{}; | ||
| 45 | u32 incr_syncpt_base32{}; | ||
| 46 | u32 stall_count_high{}; | ||
| 47 | u32 stall_count_low{}; | ||
| 48 | u32 xref_ctrl{}; | ||
| 49 | u32 channel_xref_high{}; | ||
| 50 | u32 channel_xref_low{}; | ||
| 51 | }; | ||
| 52 | static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); | ||
| 53 | |||
| 54 | enum class Method : u32 { | 17 | enum class Method : u32 { |
| 55 | WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, | 18 | WaitSyncpt = 0x8, |
| 56 | LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, | 19 | LoadSyncptPayload32 = 0x4e, |
| 57 | WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, | 20 | WaitSyncpt32 = 0x50, |
| 58 | }; | 21 | }; |
| 59 | 22 | ||
| 60 | explicit Host1x(GPU& gpu); | 23 | explicit Host1x(GPU& gpu); |
| 61 | ~Host1x(); | 24 | ~Host1x(); |
| 62 | 25 | ||
| 63 | /// Writes the method into the state, Invoke Execute() if encountered | 26 | /// Writes the method into the state, Invoke Execute() if encountered |
| 64 | void ProcessMethod(Method method, const std::vector<u32>& arguments); | 27 | void ProcessMethod(Method method, u32 argument); |
| 65 | 28 | ||
| 66 | private: | 29 | private: |
| 67 | /// For Host1x, execute is waiting on a syncpoint previously written into the state | 30 | /// For Host1x, execute is waiting on a syncpoint previously written into the state |
| 68 | void Execute(u32 data); | 31 | void Execute(u32 data); |
| 69 | 32 | ||
| 70 | /// Write argument into the provided offset | ||
| 71 | void StateWrite(u32 offset, u32 arguments); | ||
| 72 | |||
| 73 | u32 syncpoint_value{}; | 33 | u32 syncpoint_value{}; |
| 74 | Host1xClassRegisters state{}; | ||
| 75 | GPU& gpu; | 34 | GPU& gpu; |
| 76 | }; | 35 | }; |
| 77 | 36 | ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..55e632346 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 11 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/texture_cache/surface_params.h" | 12 | #include "video_core/textures/decoders.h" |
| 13 | 13 | ||
| 14 | extern "C" { | 14 | extern "C" { |
| 15 | #include <libswscale/swscale.h> | 15 | #include <libswscale/swscale.h> |
| @@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { | |||
| 53 | 53 | ||
| 54 | void Vic::Execute() { | 54 | void Vic::Execute() { |
| 55 | if (output_surface_luma_address == 0) { | 55 | if (output_surface_luma_address == 0) { |
| 56 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", | 56 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", |
| 57 | vic_state.output_surface.luma_offset); | 57 | vic_state.output_surface.luma_offset); |
| 58 | return; | 58 | return; |
| 59 | } | 59 | } |
| @@ -105,9 +105,9 @@ void Vic::Execute() { | |||
| 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, | 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, |
| 106 | block_height, 0); | 106 | block_height, 0); |
| 107 | std::vector<u8> swizzled_data(size); | 107 | std::vector<u8> swizzled_data(size); |
| 108 | Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, | 108 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, |
| 109 | swizzled_data.data(), converted_frame_buffer.get(), | 109 | frame->width, 4, swizzled_data.data(), |
| 110 | false, block_height, 0, 1); | 110 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 111 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); |
| 113 | gpu.Maxwell3D().OnMemoryWrite(); | 113 | gpu.Maxwell3D().OnMemoryWrite(); |
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..acf2668dc 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp | |||
| @@ -3,33 +3,33 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <bitset> | ||
| 7 | #include <cstddef> | 6 | #include <cstddef> |
| 8 | 7 | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/compatible_formats.h" | 9 | #include "video_core/compatible_formats.h" |
| 10 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 11 | 11 | ||
| 12 | namespace VideoCore::Surface { | 12 | namespace VideoCore::Surface { |
| 13 | |||
| 14 | namespace { | 13 | namespace { |
| 14 | using Table = std::array<std::array<u64, 2>, MaxPixelFormat>; | ||
| 15 | 15 | ||
| 16 | // Compatibility table taken from Table 3.X.2 in: | 16 | // Compatibility table taken from Table 3.X.2 in: |
| 17 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt | 17 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt |
| 18 | 18 | ||
| 19 | constexpr std::array VIEW_CLASS_128_BITS = { | 19 | constexpr std::array VIEW_CLASS_128_BITS{ |
| 20 | PixelFormat::R32G32B32A32_FLOAT, | 20 | PixelFormat::R32G32B32A32_FLOAT, |
| 21 | PixelFormat::R32G32B32A32_UINT, | 21 | PixelFormat::R32G32B32A32_UINT, |
| 22 | PixelFormat::R32G32B32A32_SINT, | 22 | PixelFormat::R32G32B32A32_SINT, |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | constexpr std::array VIEW_CLASS_96_BITS = { | 25 | constexpr std::array VIEW_CLASS_96_BITS{ |
| 26 | PixelFormat::R32G32B32_FLOAT, | 26 | PixelFormat::R32G32B32_FLOAT, |
| 27 | }; | 27 | }; |
| 28 | // Missing formats: | 28 | // Missing formats: |
| 29 | // PixelFormat::RGB32UI, | 29 | // PixelFormat::RGB32UI, |
| 30 | // PixelFormat::RGB32I, | 30 | // PixelFormat::RGB32I, |
| 31 | 31 | ||
| 32 | constexpr std::array VIEW_CLASS_64_BITS = { | 32 | constexpr std::array VIEW_CLASS_64_BITS{ |
| 33 | PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, | 33 | PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, |
| 34 | PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, | 34 | PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, |
| 35 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, | 35 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, |
| @@ -38,7 +38,7 @@ constexpr std::array VIEW_CLASS_64_BITS = { | |||
| 38 | 38 | ||
| 39 | // TODO: How should we handle 48 bits? | 39 | // TODO: How should we handle 48 bits? |
| 40 | 40 | ||
| 41 | constexpr std::array VIEW_CLASS_32_BITS = { | 41 | constexpr std::array VIEW_CLASS_32_BITS{ |
| 42 | PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, | 42 | PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, |
| 43 | PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, | 43 | PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, |
| 44 | PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, | 44 | PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, |
| @@ -50,43 +50,105 @@ constexpr std::array VIEW_CLASS_32_BITS = { | |||
| 50 | 50 | ||
| 51 | // TODO: How should we handle 24 bits? | 51 | // TODO: How should we handle 24 bits? |
| 52 | 52 | ||
| 53 | constexpr std::array VIEW_CLASS_16_BITS = { | 53 | constexpr std::array VIEW_CLASS_16_BITS{ |
| 54 | PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, | 54 | PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, |
| 55 | PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, | 55 | PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, |
| 56 | PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, | 56 | PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, |
| 57 | }; | 57 | }; |
| 58 | 58 | ||
| 59 | constexpr std::array VIEW_CLASS_8_BITS = { | 59 | constexpr std::array VIEW_CLASS_8_BITS{ |
| 60 | PixelFormat::R8_UINT, | 60 | PixelFormat::R8_UINT, |
| 61 | PixelFormat::R8_UNORM, | 61 | PixelFormat::R8_UNORM, |
| 62 | PixelFormat::R8_SINT, | 62 | PixelFormat::R8_SINT, |
| 63 | PixelFormat::R8_SNORM, | 63 | PixelFormat::R8_SNORM, |
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | constexpr std::array VIEW_CLASS_RGTC1_RED = { | 66 | constexpr std::array VIEW_CLASS_RGTC1_RED{ |
| 67 | PixelFormat::BC4_UNORM, | 67 | PixelFormat::BC4_UNORM, |
| 68 | PixelFormat::BC4_SNORM, | 68 | PixelFormat::BC4_SNORM, |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | constexpr std::array VIEW_CLASS_RGTC2_RG = { | 71 | constexpr std::array VIEW_CLASS_RGTC2_RG{ |
| 72 | PixelFormat::BC5_UNORM, | 72 | PixelFormat::BC5_UNORM, |
| 73 | PixelFormat::BC5_SNORM, | 73 | PixelFormat::BC5_SNORM, |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | constexpr std::array VIEW_CLASS_BPTC_UNORM = { | 76 | constexpr std::array VIEW_CLASS_BPTC_UNORM{ |
| 77 | PixelFormat::BC7_UNORM, | 77 | PixelFormat::BC7_UNORM, |
| 78 | PixelFormat::BC7_SRGB, | 78 | PixelFormat::BC7_SRGB, |
| 79 | }; | 79 | }; |
| 80 | 80 | ||
| 81 | constexpr std::array VIEW_CLASS_BPTC_FLOAT = { | 81 | constexpr std::array VIEW_CLASS_BPTC_FLOAT{ |
| 82 | PixelFormat::BC6H_SFLOAT, | 82 | PixelFormat::BC6H_SFLOAT, |
| 83 | PixelFormat::BC6H_UFLOAT, | 83 | PixelFormat::BC6H_UFLOAT, |
| 84 | }; | 84 | }; |
| 85 | 85 | ||
| 86 | constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ | ||
| 87 | PixelFormat::ASTC_2D_4X4_UNORM, | ||
| 88 | PixelFormat::ASTC_2D_4X4_SRGB, | ||
| 89 | }; | ||
| 90 | |||
| 91 | constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ | ||
| 92 | PixelFormat::ASTC_2D_5X4_UNORM, | ||
| 93 | PixelFormat::ASTC_2D_5X4_SRGB, | ||
| 94 | }; | ||
| 95 | |||
| 96 | constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ | ||
| 97 | PixelFormat::ASTC_2D_5X5_UNORM, | ||
| 98 | PixelFormat::ASTC_2D_5X5_SRGB, | ||
| 99 | }; | ||
| 100 | |||
| 101 | constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ | ||
| 102 | PixelFormat::ASTC_2D_6X5_UNORM, | ||
| 103 | PixelFormat::ASTC_2D_6X5_SRGB, | ||
| 104 | }; | ||
| 105 | |||
| 106 | constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ | ||
| 107 | PixelFormat::ASTC_2D_6X6_UNORM, | ||
| 108 | PixelFormat::ASTC_2D_6X6_SRGB, | ||
| 109 | }; | ||
| 110 | |||
| 111 | constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ | ||
| 112 | PixelFormat::ASTC_2D_8X5_UNORM, | ||
| 113 | PixelFormat::ASTC_2D_8X5_SRGB, | ||
| 114 | }; | ||
| 115 | |||
| 116 | constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ | ||
| 117 | PixelFormat::ASTC_2D_8X8_UNORM, | ||
| 118 | PixelFormat::ASTC_2D_8X8_SRGB, | ||
| 119 | }; | ||
| 120 | |||
| 121 | // Missing formats: | ||
| 122 | // PixelFormat::ASTC_2D_10X5_UNORM | ||
| 123 | // PixelFormat::ASTC_2D_10X5_SRGB | ||
| 124 | |||
| 125 | // Missing formats: | ||
| 126 | // PixelFormat::ASTC_2D_10X6_UNORM | ||
| 127 | // PixelFormat::ASTC_2D_10X6_SRGB | ||
| 128 | |||
| 129 | constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ | ||
| 130 | PixelFormat::ASTC_2D_10X8_UNORM, | ||
| 131 | PixelFormat::ASTC_2D_10X8_SRGB, | ||
| 132 | }; | ||
| 133 | |||
| 134 | constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ | ||
| 135 | PixelFormat::ASTC_2D_10X10_UNORM, | ||
| 136 | PixelFormat::ASTC_2D_10X10_SRGB, | ||
| 137 | }; | ||
| 138 | |||
| 139 | // Missing formats | ||
| 140 | // ASTC_2D_12X10_UNORM, | ||
| 141 | // ASTC_2D_12X10_SRGB, | ||
| 142 | |||
| 143 | constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ | ||
| 144 | PixelFormat::ASTC_2D_12X12_UNORM, | ||
| 145 | PixelFormat::ASTC_2D_12X12_SRGB, | ||
| 146 | }; | ||
| 147 | |||
| 86 | // Compatibility table taken from Table 4.X.1 in: | 148 | // Compatibility table taken from Table 4.X.1 in: |
| 87 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt | 149 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt |
| 88 | 150 | ||
| 89 | constexpr std::array COPY_CLASS_128_BITS = { | 151 | constexpr std::array COPY_CLASS_128_BITS{ |
| 90 | PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, | 152 | PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, |
| 91 | PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, | 153 | PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, |
| 92 | PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, | 154 | PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, |
| @@ -97,7 +159,7 @@ constexpr std::array COPY_CLASS_128_BITS = { | |||
| 97 | // PixelFormat::RGBA32I | 159 | // PixelFormat::RGBA32I |
| 98 | // COMPRESSED_RG_RGTC2 | 160 | // COMPRESSED_RG_RGTC2 |
| 99 | 161 | ||
| 100 | constexpr std::array COPY_CLASS_64_BITS = { | 162 | constexpr std::array COPY_CLASS_64_BITS{ |
| 101 | PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, | 163 | PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, |
| 102 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, | 164 | PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, |
| 103 | PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, | 165 | PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, |
| @@ -110,32 +172,36 @@ constexpr std::array COPY_CLASS_64_BITS = { | |||
| 110 | // COMPRESSED_RGBA_S3TC_DXT1_EXT | 172 | // COMPRESSED_RGBA_S3TC_DXT1_EXT |
| 111 | // COMPRESSED_SIGNED_RED_RGTC1 | 173 | // COMPRESSED_SIGNED_RED_RGTC1 |
| 112 | 174 | ||
| 113 | void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { | 175 | constexpr void Enable(Table& table, size_t format_a, size_t format_b) { |
| 114 | compatiblity[format_a][format_b] = true; | 176 | table[format_a][format_b / 64] |= u64(1) << (format_b % 64); |
| 115 | compatiblity[format_b][format_a] = true; | 177 | table[format_b][format_a / 64] |= u64(1) << (format_a % 64); |
| 116 | } | 178 | } |
| 117 | 179 | ||
| 118 | void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { | 180 | constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { |
| 119 | Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); | 181 | Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); |
| 120 | } | 182 | } |
| 121 | 183 | ||
| 122 | template <typename Range> | 184 | template <typename Range> |
| 123 | void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { | 185 | constexpr void EnableRange(Table& table, const Range& range) { |
| 124 | for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { | 186 | for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { |
| 125 | for (auto it_b = it_a; it_b != range.end(); ++it_b) { | 187 | for (auto it_b = it_a; it_b != range.end(); ++it_b) { |
| 126 | Enable(compatibility, *it_a, *it_b); | 188 | Enable(table, *it_a, *it_b); |
| 127 | } | 189 | } |
| 128 | } | 190 | } |
| 129 | } | 191 | } |
| 130 | 192 | ||
| 131 | } // Anonymous namespace | 193 | constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { |
| 194 | const size_t a = static_cast<size_t>(format_a); | ||
| 195 | const size_t b = static_cast<size_t>(format_b); | ||
| 196 | return ((table[a][b / 64] >> (b % 64)) & 1) != 0; | ||
| 197 | } | ||
| 132 | 198 | ||
| 133 | FormatCompatibility::FormatCompatibility() { | 199 | constexpr Table MakeViewTable() { |
| 200 | Table view{}; | ||
| 134 | for (size_t i = 0; i < MaxPixelFormat; ++i) { | 201 | for (size_t i = 0; i < MaxPixelFormat; ++i) { |
| 135 | // Identity is allowed | 202 | // Identity is allowed |
| 136 | Enable(view, i, i); | 203 | Enable(view, i, i); |
| 137 | } | 204 | } |
| 138 | |||
| 139 | EnableRange(view, VIEW_CLASS_128_BITS); | 205 | EnableRange(view, VIEW_CLASS_128_BITS); |
| 140 | EnableRange(view, VIEW_CLASS_96_BITS); | 206 | EnableRange(view, VIEW_CLASS_96_BITS); |
| 141 | EnableRange(view, VIEW_CLASS_64_BITS); | 207 | EnableRange(view, VIEW_CLASS_64_BITS); |
| @@ -146,10 +212,39 @@ FormatCompatibility::FormatCompatibility() { | |||
| 146 | EnableRange(view, VIEW_CLASS_RGTC2_RG); | 212 | EnableRange(view, VIEW_CLASS_RGTC2_RG); |
| 147 | EnableRange(view, VIEW_CLASS_BPTC_UNORM); | 213 | EnableRange(view, VIEW_CLASS_BPTC_UNORM); |
| 148 | EnableRange(view, VIEW_CLASS_BPTC_FLOAT); | 214 | EnableRange(view, VIEW_CLASS_BPTC_FLOAT); |
| 215 | EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); | ||
| 216 | EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); | ||
| 217 | EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); | ||
| 218 | EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); | ||
| 219 | EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); | ||
| 220 | EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); | ||
| 221 | EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); | ||
| 222 | EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); | ||
| 223 | EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); | ||
| 224 | EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); | ||
| 225 | return view; | ||
| 226 | } | ||
| 149 | 227 | ||
| 150 | copy = view; | 228 | constexpr Table MakeCopyTable() { |
| 229 | Table copy = MakeViewTable(); | ||
| 151 | EnableRange(copy, COPY_CLASS_128_BITS); | 230 | EnableRange(copy, COPY_CLASS_128_BITS); |
| 152 | EnableRange(copy, COPY_CLASS_64_BITS); | 231 | EnableRange(copy, COPY_CLASS_64_BITS); |
| 232 | return copy; | ||
| 233 | } | ||
| 234 | } // Anonymous namespace | ||
| 235 | |||
| 236 | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { | ||
| 237 | if (broken_views) { | ||
| 238 | // If format views are broken, only accept formats that are identical. | ||
| 239 | return format_a == format_b; | ||
| 240 | } | ||
| 241 | static constexpr Table TABLE = MakeViewTable(); | ||
| 242 | return IsSupported(TABLE, format_a, format_b); | ||
| 243 | } | ||
| 244 | |||
| 245 | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { | ||
| 246 | static constexpr Table TABLE = MakeCopyTable(); | ||
| 247 | return IsSupported(TABLE, format_a, format_b); | ||
| 153 | } | 248 | } |
| 154 | 249 | ||
| 155 | } // namespace VideoCore::Surface | 250 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..9a0522988 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h | |||
| @@ -4,31 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <cstddef> | ||
| 10 | |||
| 11 | #include "video_core/surface.h" | 7 | #include "video_core/surface.h" |
| 12 | 8 | ||
| 13 | namespace VideoCore::Surface { | 9 | namespace VideoCore::Surface { |
| 14 | 10 | ||
| 15 | class FormatCompatibility { | 11 | bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); |
| 16 | public: | ||
| 17 | using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; | ||
| 18 | |||
| 19 | explicit FormatCompatibility(); | ||
| 20 | |||
| 21 | bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 22 | return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 26 | return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 27 | } | ||
| 28 | 12 | ||
| 29 | private: | 13 | bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); |
| 30 | Table view; | ||
| 31 | Table copy; | ||
| 32 | }; | ||
| 33 | 14 | ||
| 34 | } // namespace VideoCore::Surface | 15 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | /// Container to push objects to be destroyed a few ticks in the future | ||
| 15 | template <typename T, size_t TICKS_TO_DESTROY> | ||
| 16 | class DelayedDestructionRing { | ||
| 17 | public: | ||
| 18 | void Tick() { | ||
| 19 | index = (index + 1) % TICKS_TO_DESTROY; | ||
| 20 | elements[index].clear(); | ||
| 21 | } | ||
| 22 | |||
| 23 | void Push(T&& object) { | ||
| 24 | elements[index].push_back(std::move(object)); | ||
| 25 | } | ||
| 26 | |||
| 27 | private: | ||
| 28 | size_t index = 0; | ||
| 29 | std::array<std::vector<T>, TICKS_TO_DESTROY> elements; | ||
| 30 | }; | ||
| 31 | |||
| 32 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 2faa6ef0e..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -16,6 +16,9 @@ namespace VideoCommon::Dirty { | |||
| 16 | using Tegra::Engines::Maxwell3D; | 16 | using Tegra::Engines::Maxwell3D; |
| 17 | 17 | ||
| 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { |
| 19 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); | ||
| 20 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); | ||
| 21 | |||
| 19 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | 22 | static constexpr std::size_t num_per_rt = NUM(rt[0]); |
| 20 | static constexpr std::size_t begin = OFF(rt); | 23 | static constexpr std::size_t begin = OFF(rt); |
| 21 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | 24 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; |
| @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl | |||
| 23 | FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); | 26 | FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); |
| 24 | } | 27 | } |
| 25 | FillBlock(tables[1], begin, num, RenderTargets); | 28 | FillBlock(tables[1], begin, num, RenderTargets); |
| 29 | FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); | ||
| 30 | |||
| 31 | tables[0][OFF(rt_control)] = RenderTargets; | ||
| 32 | tables[1][OFF(rt_control)] = RenderTargetControl; | ||
| 26 | 33 | ||
| 27 | static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; | 34 | static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; |
| 28 | for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { | 35 | for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty { | |||
| 16 | enum : u8 { | 16 | enum : u8 { |
| 17 | NullEntry = 0, | 17 | NullEntry = 0, |
| 18 | 18 | ||
| 19 | Descriptors, | ||
| 20 | |||
| 19 | RenderTargets, | 21 | RenderTargets, |
| 22 | RenderTargetControl, | ||
| 20 | ColorBuffer0, | 23 | ColorBuffer0, |
| 21 | ColorBuffer1, | 24 | ColorBuffer1, |
| 22 | ColorBuffer2, | 25 | ColorBuffer2, |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -10,7 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 12 | 12 | ||
| 13 | Fermi2D::Fermi2D() = default; | 13 | Fermi2D::Fermi2D() { |
| 14 | // Nvidia's OpenGL driver seems to assume these values | ||
| 15 | regs.src.depth = 1; | ||
| 16 | regs.dst.depth = 1; | ||
| 17 | } | ||
| 14 | 18 | ||
| 15 | Fermi2D::~Fermi2D() = default; | 19 | Fermi2D::~Fermi2D() = default; |
| 16 | 20 | ||
| @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | |||
| 21 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 25 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 22 | ASSERT_MSG(method < Regs::NUM_REGS, | 26 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 23 | "Invalid Fermi2D register, increase the size of the Regs structure"); | 27 | "Invalid Fermi2D register, increase the size of the Regs structure"); |
| 24 | |||
| 25 | regs.reg_array[method] = method_argument; | 28 | regs.reg_array[method] = method_argument; |
| 26 | 29 | ||
| 27 | switch (method) { | 30 | if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { |
| 28 | // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, | 31 | Blit(); |
| 29 | // so trigger on the second 32-bit write. | ||
| 30 | case FERMI2D_REG_INDEX(blit_src_y) + 1: { | ||
| 31 | HandleSurfaceCopy(); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 32 | } |
| 35 | } | 33 | } |
| 36 | 34 | ||
| 37 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | 35 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { |
| 38 | for (std::size_t i = 0; i < amount; i++) { | 36 | for (u32 i = 0; i < amount; ++i) { |
| 39 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 37 | CallMethod(method, base_start[i], methods_pending - i <= 1); |
| 40 | } | 38 | } |
| 41 | } | 39 | } |
| 42 | 40 | ||
| 43 | static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { | 41 | void Fermi2D::Blit() { |
| 44 | const u32 line_a = src_2 - src_1; | 42 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", |
| 45 | const u32 line_b = dst_2 - dst_1; | 43 | regs.src.Address(), regs.dst.Address()); |
| 46 | const u32 excess = std::max<s32>(0, line_a - src_line + src_1); | ||
| 47 | return {line_b - (excess * line_b) / line_a, excess}; | ||
| 48 | } | ||
| 49 | |||
| 50 | void Fermi2D::HandleSurfaceCopy() { | ||
| 51 | LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); | ||
| 52 | 44 | ||
| 53 | // TODO(Subv): Only raw copies are implemented. | 45 | UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); |
| 54 | ASSERT(regs.operation == Operation::SrcCopy); | 46 | UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); |
| 47 | UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); | ||
| 48 | UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); | ||
| 49 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); | ||
| 55 | 50 | ||
| 56 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; | 51 | const auto& args = regs.pixels_from_memory; |
| 57 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; | 52 | const Config config{ |
| 58 | u32 src_blit_x2, src_blit_y2; | ||
| 59 | if (regs.blit_control.origin == Origin::Corner) { | ||
| 60 | src_blit_x2 = | ||
| 61 | static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); | ||
| 62 | src_blit_y2 = | ||
| 63 | static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); | ||
| 64 | } else { | ||
| 65 | src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); | ||
| 66 | src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); | ||
| 67 | } | ||
| 68 | u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; | ||
| 69 | u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; | ||
| 70 | const auto [new_dst_w, src_excess_x] = | ||
| 71 | DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); | ||
| 72 | const auto [new_dst_h, src_excess_y] = | ||
| 73 | DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); | ||
| 74 | dst_blit_x2 = new_dst_w + regs.blit_dst_x; | ||
| 75 | src_blit_x2 = src_blit_x2 - src_excess_x; | ||
| 76 | dst_blit_y2 = new_dst_h + regs.blit_dst_y; | ||
| 77 | src_blit_y2 = src_blit_y2 - src_excess_y; | ||
| 78 | const auto [new_src_w, dst_excess_x] = | ||
| 79 | DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); | ||
| 80 | const auto [new_src_h, dst_excess_y] = | ||
| 81 | DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); | ||
| 82 | src_blit_x2 = new_src_w + src_blit_x1; | ||
| 83 | dst_blit_x2 = dst_blit_x2 - dst_excess_x; | ||
| 84 | src_blit_y2 = new_src_h + src_blit_y1; | ||
| 85 | dst_blit_y2 = dst_blit_y2 - dst_excess_y; | ||
| 86 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; | ||
| 87 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, | ||
| 88 | dst_blit_y2}; | ||
| 89 | const Config copy_config{ | ||
| 90 | .operation = regs.operation, | 53 | .operation = regs.operation, |
| 91 | .filter = regs.blit_control.filter, | 54 | .filter = args.sample_mode.filter, |
| 92 | .src_rect = src_rect, | 55 | .dst_x0 = args.dst_x0, |
| 93 | .dst_rect = dst_rect, | 56 | .dst_y0 = args.dst_y0, |
| 57 | .dst_x1 = args.dst_x0 + args.dst_width, | ||
| 58 | .dst_y1 = args.dst_y0 + args.dst_height, | ||
| 59 | .src_x0 = static_cast<s32>(args.src_x0 >> 32), | ||
| 60 | .src_y0 = static_cast<s32>(args.src_y0 >> 32), | ||
| 61 | .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), | ||
| 62 | .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), | ||
| 94 | }; | 63 | }; |
| 95 | if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { | 64 | if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { |
| 96 | UNIMPLEMENTED(); | 65 | UNIMPLEMENTED(); |
| 97 | } | 66 | } |
| 98 | } | 67 | } |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -53,8 +53,8 @@ public: | |||
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | enum class Filter : u32 { | 55 | enum class Filter : u32 { |
| 56 | PointSample = 0, // Nearest | 56 | Point = 0, |
| 57 | Linear = 1, | 57 | Bilinear = 1, |
| 58 | }; | 58 | }; |
| 59 | 59 | ||
| 60 | enum class Operation : u32 { | 60 | enum class Operation : u32 { |
| @@ -67,88 +67,235 @@ public: | |||
| 67 | BlendPremult = 6, | 67 | BlendPremult = 6, |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
| 70 | struct Regs { | 70 | enum class MemoryLayout : u32 { |
| 71 | static constexpr std::size_t NUM_REGS = 0x258; | 71 | BlockLinear = 0, |
| 72 | Pitch = 1, | ||
| 73 | }; | ||
| 72 | 74 | ||
| 73 | struct Surface { | 75 | enum class CpuIndexWrap : u32 { |
| 74 | RenderTargetFormat format; | 76 | Wrap = 0, |
| 75 | BitField<0, 1, u32> linear; | 77 | NoWrap = 1, |
| 76 | union { | 78 | }; |
| 77 | BitField<0, 4, u32> block_width; | ||
| 78 | BitField<4, 4, u32> block_height; | ||
| 79 | BitField<8, 4, u32> block_depth; | ||
| 80 | }; | ||
| 81 | u32 depth; | ||
| 82 | u32 layer; | ||
| 83 | u32 pitch; | ||
| 84 | u32 width; | ||
| 85 | u32 height; | ||
| 86 | u32 address_high; | ||
| 87 | u32 address_low; | ||
| 88 | |||
| 89 | GPUVAddr Address() const { | ||
| 90 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 91 | address_low); | ||
| 92 | } | ||
| 93 | |||
| 94 | u32 BlockWidth() const { | ||
| 95 | return block_width.Value(); | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 BlockHeight() const { | ||
| 99 | return block_height.Value(); | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 BlockDepth() const { | ||
| 103 | return block_depth.Value(); | ||
| 104 | } | ||
| 105 | }; | ||
| 106 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | ||
| 107 | 79 | ||
| 80 | struct Surface { | ||
| 81 | RenderTargetFormat format; | ||
| 82 | MemoryLayout linear; | ||
| 108 | union { | 83 | union { |
| 109 | struct { | 84 | BitField<0, 4, u32> block_width; |
| 110 | INSERT_UNION_PADDING_WORDS(0x80); | 85 | BitField<4, 4, u32> block_height; |
| 86 | BitField<8, 4, u32> block_depth; | ||
| 87 | }; | ||
| 88 | u32 depth; | ||
| 89 | u32 layer; | ||
| 90 | u32 pitch; | ||
| 91 | u32 width; | ||
| 92 | u32 height; | ||
| 93 | u32 addr_upper; | ||
| 94 | u32 addr_lower; | ||
| 95 | |||
| 96 | [[nodiscard]] constexpr GPUVAddr Address() const noexcept { | ||
| 97 | return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower); | ||
| 98 | } | ||
| 99 | }; | ||
| 100 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | ||
| 111 | 101 | ||
| 112 | Surface dst; | 102 | enum class SectorPromotion : u32 { |
| 103 | NoPromotion = 0, | ||
| 104 | PromoteTo2V = 1, | ||
| 105 | PromoteTo2H = 2, | ||
| 106 | PromoteTo4 = 3, | ||
| 107 | }; | ||
| 108 | |||
| 109 | enum class NumTpcs : u32 { | ||
| 110 | All = 0, | ||
| 111 | One = 1, | ||
| 112 | }; | ||
| 113 | 113 | ||
| 114 | INSERT_UNION_PADDING_WORDS(2); | 114 | enum class RenderEnableMode : u32 { |
| 115 | False = 0, | ||
| 116 | True = 1, | ||
| 117 | Conditional = 2, | ||
| 118 | RenderIfEqual = 3, | ||
| 119 | RenderIfNotEqual = 4, | ||
| 120 | }; | ||
| 115 | 121 | ||
| 116 | Surface src; | 122 | enum class ColorKeyFormat : u32 { |
| 123 | A16R56G6B5 = 0, | ||
| 124 | A1R5G55B5 = 1, | ||
| 125 | A8R8G8B8 = 2, | ||
| 126 | A2R10G10B10 = 3, | ||
| 127 | Y8 = 4, | ||
| 128 | Y16 = 5, | ||
| 129 | Y32 = 6, | ||
| 130 | }; | ||
| 117 | 131 | ||
| 118 | INSERT_UNION_PADDING_WORDS(0x15); | 132 | union Beta4 { |
| 133 | BitField<0, 8, u32> b; | ||
| 134 | BitField<8, 8, u32> g; | ||
| 135 | BitField<16, 8, u32> r; | ||
| 136 | BitField<24, 8, u32> a; | ||
| 137 | }; | ||
| 119 | 138 | ||
| 120 | Operation operation; | 139 | struct Point { |
| 140 | u32 x; | ||
| 141 | u32 y; | ||
| 142 | }; | ||
| 121 | 143 | ||
| 122 | INSERT_UNION_PADDING_WORDS(0x177); | 144 | enum class PatternSelect : u32 { |
| 145 | MonoChrome8x8 = 0, | ||
| 146 | MonoChrome64x1 = 1, | ||
| 147 | MonoChrome1x64 = 2, | ||
| 148 | Color = 3, | ||
| 149 | }; | ||
| 123 | 150 | ||
| 151 | enum class NotifyType : u32 { | ||
| 152 | WriteOnly = 0, | ||
| 153 | WriteThenAwaken = 1, | ||
| 154 | }; | ||
| 155 | |||
| 156 | enum class MonochromePatternColorFormat : u32 { | ||
| 157 | A8X8R8G6B5 = 0, | ||
| 158 | A1R5G5B5 = 1, | ||
| 159 | A8R8G8B8 = 2, | ||
| 160 | A8Y8 = 3, | ||
| 161 | A8X8Y16 = 4, | ||
| 162 | Y32 = 5, | ||
| 163 | }; | ||
| 164 | |||
| 165 | enum class MonochromePatternFormat : u32 { | ||
| 166 | CGA6_M1 = 0, | ||
| 167 | LE_M1 = 1, | ||
| 168 | }; | ||
| 169 | |||
| 170 | union Regs { | ||
| 171 | static constexpr std::size_t NUM_REGS = 0x258; | ||
| 172 | struct { | ||
| 173 | u32 object; | ||
| 174 | INSERT_UNION_PADDING_WORDS(0x3F); | ||
| 175 | u32 no_operation; | ||
| 176 | NotifyType notify; | ||
| 177 | INSERT_UNION_PADDING_WORDS(0x2); | ||
| 178 | u32 wait_for_idle; | ||
| 179 | INSERT_UNION_PADDING_WORDS(0xB); | ||
| 180 | u32 pm_trigger; | ||
| 181 | INSERT_UNION_PADDING_WORDS(0xF); | ||
| 182 | u32 context_dma_notify; | ||
| 183 | u32 dst_context_dma; | ||
| 184 | u32 src_context_dma; | ||
| 185 | u32 semaphore_context_dma; | ||
| 186 | INSERT_UNION_PADDING_WORDS(0x1C); | ||
| 187 | Surface dst; | ||
| 188 | CpuIndexWrap pixels_from_cpu_index_wrap; | ||
| 189 | u32 kind2d_check_enable; | ||
| 190 | Surface src; | ||
| 191 | SectorPromotion pixels_from_memory_sector_promotion; | ||
| 192 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 193 | NumTpcs num_tpcs; | ||
| 194 | u32 render_enable_addr_upper; | ||
| 195 | u32 render_enable_addr_lower; | ||
| 196 | RenderEnableMode render_enable_mode; | ||
| 197 | INSERT_UNION_PADDING_WORDS(0x4); | ||
| 198 | u32 clip_x0; | ||
| 199 | u32 clip_y0; | ||
| 200 | u32 clip_width; | ||
| 201 | u32 clip_height; | ||
| 202 | BitField<0, 1, u32> clip_enable; | ||
| 203 | BitField<0, 3, ColorKeyFormat> color_key_format; | ||
| 204 | u32 color_key; | ||
| 205 | BitField<0, 1, u32> color_key_enable; | ||
| 206 | BitField<0, 8, u32> rop; | ||
| 207 | u32 beta1; | ||
| 208 | Beta4 beta4; | ||
| 209 | Operation operation; | ||
| 210 | union { | ||
| 211 | BitField<0, 6, u32> x; | ||
| 212 | BitField<8, 6, u32> y; | ||
| 213 | } pattern_offset; | ||
| 214 | BitField<0, 2, PatternSelect> pattern_select; | ||
| 215 | INSERT_UNION_PADDING_WORDS(0xC); | ||
| 216 | struct { | ||
| 217 | BitField<0, 3, MonochromePatternColorFormat> color_format; | ||
| 218 | BitField<0, 1, MonochromePatternFormat> format; | ||
| 219 | u32 color0; | ||
| 220 | u32 color1; | ||
| 221 | u32 pattern0; | ||
| 222 | u32 pattern1; | ||
| 223 | } monochrome_pattern; | ||
| 224 | struct { | ||
| 225 | std::array<u32, 0x40> X8R8G8B8; | ||
| 226 | std::array<u32, 0x20> R5G6B5; | ||
| 227 | std::array<u32, 0x20> X1R5G5B5; | ||
| 228 | std::array<u32, 0x10> Y8; | ||
| 229 | } color_pattern; | ||
| 230 | INSERT_UNION_PADDING_WORDS(0x10); | ||
| 231 | struct { | ||
| 232 | u32 prim_mode; | ||
| 233 | u32 prim_color_format; | ||
| 234 | u32 prim_color; | ||
| 235 | u32 line_tie_break_bits; | ||
| 236 | INSERT_UNION_PADDING_WORDS(0x14); | ||
| 237 | u32 prim_point_xy; | ||
| 238 | INSERT_UNION_PADDING_WORDS(0x7); | ||
| 239 | std::array<Point, 0x40> prim_point; | ||
| 240 | } render_solid; | ||
| 241 | struct { | ||
| 242 | u32 data_type; | ||
| 243 | u32 color_format; | ||
| 244 | u32 index_format; | ||
| 245 | u32 mono_format; | ||
| 246 | u32 wrap; | ||
| 247 | u32 color0; | ||
| 248 | u32 color1; | ||
| 249 | u32 mono_opacity; | ||
| 250 | INSERT_UNION_PADDING_WORDS(0x6); | ||
| 251 | u32 src_width; | ||
| 252 | u32 src_height; | ||
| 253 | u32 dx_du_frac; | ||
| 254 | u32 dx_du_int; | ||
| 255 | u32 dx_dv_frac; | ||
| 256 | u32 dy_dv_int; | ||
| 257 | u32 dst_x0_frac; | ||
| 258 | u32 dst_x0_int; | ||
| 259 | u32 dst_y0_frac; | ||
| 260 | u32 dst_y0_int; | ||
| 261 | u32 data; | ||
| 262 | } pixels_from_cpu; | ||
| 263 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 264 | u32 big_endian_control; | ||
| 265 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 266 | struct { | ||
| 267 | BitField<0, 3, u32> block_shape; | ||
| 268 | BitField<0, 5, u32> corral_size; | ||
| 269 | BitField<0, 1, u32> safe_overlap; | ||
| 124 | union { | 270 | union { |
| 125 | u32 raw; | ||
| 126 | BitField<0, 1, Origin> origin; | 271 | BitField<0, 1, Origin> origin; |
| 127 | BitField<4, 1, Filter> filter; | 272 | BitField<4, 1, Filter> filter; |
| 128 | } blit_control; | 273 | } sample_mode; |
| 129 | |||
| 130 | INSERT_UNION_PADDING_WORDS(0x8); | 274 | INSERT_UNION_PADDING_WORDS(0x8); |
| 131 | 275 | s32 dst_x0; | |
| 132 | u32 blit_dst_x; | 276 | s32 dst_y0; |
| 133 | u32 blit_dst_y; | 277 | s32 dst_width; |
| 134 | u32 blit_dst_width; | 278 | s32 dst_height; |
| 135 | u32 blit_dst_height; | 279 | s64 du_dx; |
| 136 | u64 blit_du_dx; | 280 | s64 dv_dy; |
| 137 | u64 blit_dv_dy; | 281 | s64 src_x0; |
| 138 | u64 blit_src_x; | 282 | s64 src_y0; |
| 139 | u64 blit_src_y; | 283 | } pixels_from_memory; |
| 140 | |||
| 141 | INSERT_UNION_PADDING_WORDS(0x21); | ||
| 142 | }; | ||
| 143 | std::array<u32, NUM_REGS> reg_array; | ||
| 144 | }; | 284 | }; |
| 285 | std::array<u32, NUM_REGS> reg_array; | ||
| 145 | } regs{}; | 286 | } regs{}; |
| 146 | 287 | ||
| 147 | struct Config { | 288 | struct Config { |
| 148 | Operation operation{}; | 289 | Operation operation; |
| 149 | Filter filter{}; | 290 | Filter filter; |
| 150 | Common::Rectangle<u32> src_rect; | 291 | s32 dst_x0; |
| 151 | Common::Rectangle<u32> dst_rect; | 292 | s32 dst_y0; |
| 293 | s32 dst_x1; | ||
| 294 | s32 dst_y1; | ||
| 295 | s32 src_x0; | ||
| 296 | s32 src_y0; | ||
| 297 | s32 src_x1; | ||
| 298 | s32 src_y1; | ||
| 152 | }; | 299 | }; |
| 153 | 300 | ||
| 154 | private: | 301 | private: |
| @@ -156,25 +303,49 @@ private: | |||
| 156 | 303 | ||
| 157 | /// Performs the copy from the source surface to the destination surface as configured in the | 304 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 158 | /// registers. | 305 | /// registers. |
| 159 | void HandleSurfaceCopy(); | 306 | void Blit(); |
| 160 | }; | 307 | }; |
| 161 | 308 | ||
| 162 | #define ASSERT_REG_POSITION(field_name, position) \ | 309 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 163 | static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ | 310 | static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ |
| 164 | "Field " #field_name " has invalid position") | 311 | "Field " #field_name " has invalid position") |
| 165 | 312 | ||
| 166 | ASSERT_REG_POSITION(dst, 0x80); | 313 | ASSERT_REG_POSITION(object, 0x0); |
| 167 | ASSERT_REG_POSITION(src, 0x8C); | 314 | ASSERT_REG_POSITION(no_operation, 0x100); |
| 168 | ASSERT_REG_POSITION(operation, 0xAB); | 315 | ASSERT_REG_POSITION(notify, 0x104); |
| 169 | ASSERT_REG_POSITION(blit_control, 0x223); | 316 | ASSERT_REG_POSITION(wait_for_idle, 0x110); |
| 170 | ASSERT_REG_POSITION(blit_dst_x, 0x22c); | 317 | ASSERT_REG_POSITION(pm_trigger, 0x140); |
| 171 | ASSERT_REG_POSITION(blit_dst_y, 0x22d); | 318 | ASSERT_REG_POSITION(context_dma_notify, 0x180); |
| 172 | ASSERT_REG_POSITION(blit_dst_width, 0x22e); | 319 | ASSERT_REG_POSITION(dst_context_dma, 0x184); |
| 173 | ASSERT_REG_POSITION(blit_dst_height, 0x22f); | 320 | ASSERT_REG_POSITION(src_context_dma, 0x188); |
| 174 | ASSERT_REG_POSITION(blit_du_dx, 0x230); | 321 | ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); |
| 175 | ASSERT_REG_POSITION(blit_dv_dy, 0x232); | 322 | ASSERT_REG_POSITION(dst, 0x200); |
| 176 | ASSERT_REG_POSITION(blit_src_x, 0x234); | 323 | ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); |
| 177 | ASSERT_REG_POSITION(blit_src_y, 0x236); | 324 | ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); |
| 325 | ASSERT_REG_POSITION(src, 0x230); | ||
| 326 | ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); | ||
| 327 | ASSERT_REG_POSITION(num_tpcs, 0x260); | ||
| 328 | ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); | ||
| 329 | ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); | ||
| 330 | ASSERT_REG_POSITION(clip_x0, 0x280); | ||
| 331 | ASSERT_REG_POSITION(clip_y0, 0x284); | ||
| 332 | ASSERT_REG_POSITION(clip_width, 0x288); | ||
| 333 | ASSERT_REG_POSITION(clip_height, 0x28c); | ||
| 334 | ASSERT_REG_POSITION(clip_enable, 0x290); | ||
| 335 | ASSERT_REG_POSITION(color_key_format, 0x294); | ||
| 336 | ASSERT_REG_POSITION(color_key, 0x298); | ||
| 337 | ASSERT_REG_POSITION(rop, 0x2A0); | ||
| 338 | ASSERT_REG_POSITION(beta1, 0x2A4); | ||
| 339 | ASSERT_REG_POSITION(beta4, 0x2A8); | ||
| 340 | ASSERT_REG_POSITION(operation, 0x2AC); | ||
| 341 | ASSERT_REG_POSITION(pattern_offset, 0x2B0); | ||
| 342 | ASSERT_REG_POSITION(pattern_select, 0x2B4); | ||
| 343 | ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); | ||
| 344 | ASSERT_REG_POSITION(color_pattern, 0x300); | ||
| 345 | ASSERT_REG_POSITION(render_solid, 0x580); | ||
| 346 | ASSERT_REG_POSITION(pixels_from_cpu, 0x800); | ||
| 347 | ASSERT_REG_POSITION(big_endian_control, 0x870); | ||
| 348 | ASSERT_REG_POSITION(pixels_from_memory, 0x880); | ||
| 178 | 349 | ||
| 179 | #undef ASSERT_REG_POSITION | 350 | #undef ASSERT_REG_POSITION |
| 180 | 351 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | |||
| 58 | } | 58 | } |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { | ||
| 62 | const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); | ||
| 63 | ASSERT(cbuf_mask[regs.tex_cb_index]); | ||
| 64 | |||
| 65 | const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; | ||
| 66 | ASSERT(texinfo.Address() != 0); | ||
| 67 | |||
| 68 | const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); | ||
| 69 | ASSERT(address < texinfo.Address() + texinfo.size); | ||
| 70 | |||
| 71 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; | ||
| 72 | return GetTextureInfo(tex_handle); | ||
| 73 | } | ||
| 74 | |||
| 75 | Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { | ||
| 76 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; | ||
| 77 | } | ||
| 78 | |||
| 79 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | 61 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |
| 80 | ASSERT(stage == ShaderType::Compute); | 62 | ASSERT(stage == ShaderType::Compute); |
| 81 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | 63 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; |
| @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 98 | 80 | ||
| 99 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { | 81 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { |
| 100 | const Texture::TextureHandle tex_handle{handle}; | 82 | const Texture::TextureHandle tex_handle{handle}; |
| 101 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 83 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |
| 102 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 84 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |
| 103 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 85 | |
| 86 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 87 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 104 | return result; | 88 | return result; |
| 105 | } | 89 | } |
| 106 | 90 | ||
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -209,11 +209,6 @@ public: | |||
| 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 210 | u32 methods_pending) override; | 210 | u32 methods_pending) override; |
| 211 | 211 | ||
| 212 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; | ||
| 213 | |||
| 214 | /// Given a texture handle, returns the TSC and TIC entries. | ||
| 215 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; | ||
| 216 | |||
| 217 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 212 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 218 | 213 | ||
| 219 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | 214 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cinttypes> | ||
| 6 | #include <cstring> | 5 | #include <cstring> |
| 7 | #include <optional> | 6 | #include <optional> |
| 8 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 227 | OnMemoryWrite(); | 226 | OnMemoryWrite(); |
| 228 | } | 227 | } |
| 229 | return; | 228 | return; |
| 229 | case MAXWELL3D_REG_INDEX(fragment_barrier): | ||
| 230 | return rasterizer->FragmentBarrier(); | ||
| 231 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | ||
| 232 | return rasterizer->TiledCacheBarrier(); | ||
| 230 | } | 233 | } |
| 231 | } | 234 | } |
| 232 | 235 | ||
| @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() { | |||
| 639 | } | 642 | } |
| 640 | 643 | ||
| 641 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 644 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| 642 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; | 645 | const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; |
| 643 | 646 | ||
| 644 | Texture::TICEntry tic_entry; | 647 | Texture::TICEntry tic_entry; |
| 645 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 648 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 648 | } | 651 | } |
| 649 | 652 | ||
| 650 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | 653 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { |
| 651 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; | 654 | const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; |
| 652 | 655 | ||
| 653 | Texture::TSCEntry tsc_entry; | 656 | Texture::TSCEntry tsc_entry; |
| 654 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 657 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 655 | return tsc_entry; | 658 | return tsc_entry; |
| 656 | } | 659 | } |
| 657 | 660 | ||
| 658 | Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { | ||
| 659 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; | ||
| 660 | } | ||
| 661 | |||
| 662 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { | ||
| 663 | const auto stage_index = static_cast<std::size_t>(stage); | ||
| 664 | const auto& shader = state.shader_stages[stage_index]; | ||
| 665 | const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | ||
| 666 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||
| 667 | |||
| 668 | const GPUVAddr tex_info_address = | ||
| 669 | tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); | ||
| 670 | |||
| 671 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | ||
| 672 | |||
| 673 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 674 | |||
| 675 | return GetTextureInfo(tex_handle); | ||
| 676 | } | ||
| 677 | |||
| 678 | u32 Maxwell3D::GetRegisterValue(u32 method) const { | 661 | u32 Maxwell3D::GetRegisterValue(u32 method) const { |
| 679 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); | 662 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); |
| 680 | return regs.reg_array[method]; | 663 | return regs.reg_array[method]; |
| 681 | } | 664 | } |
| 682 | 665 | ||
| 683 | void Maxwell3D::ProcessClearBuffers() { | 666 | void Maxwell3D::ProcessClearBuffers() { |
| 684 | ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && | ||
| 685 | regs.clear_buffers.R == regs.clear_buffers.B && | ||
| 686 | regs.clear_buffers.R == regs.clear_buffers.A); | ||
| 687 | |||
| 688 | rasterizer->Clear(); | 667 | rasterizer->Clear(); |
| 689 | } | 668 | } |
| 690 | 669 | ||
| @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse | |||
| 692 | ASSERT(stage != ShaderType::Compute); | 671 | ASSERT(stage != ShaderType::Compute); |
| 693 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | 672 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 694 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | 673 | const auto& buffer = shader_stage.const_buffers[const_buffer]; |
| 695 | u32 result; | 674 | return memory_manager.Read<u32>(buffer.address + offset); |
| 696 | std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); | ||
| 697 | return result; | ||
| 698 | } | 675 | } |
| 699 | 676 | ||
| 700 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | 677 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { |
| @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 712 | 689 | ||
| 713 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { | 690 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { |
| 714 | const Texture::TextureHandle tex_handle{handle}; | 691 | const Texture::TextureHandle tex_handle{handle}; |
| 715 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 692 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |
| 716 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 693 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |
| 717 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 694 | |
| 695 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 696 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 718 | return result; | 697 | return result; |
| 719 | } | 698 | } |
| 720 | 699 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -438,16 +438,6 @@ public: | |||
| 438 | DecrWrapOGL = 0x8508, | 438 | DecrWrapOGL = 0x8508, |
| 439 | }; | 439 | }; |
| 440 | 440 | ||
| 441 | enum class MemoryLayout : u32 { | ||
| 442 | Linear = 0, | ||
| 443 | BlockLinear = 1, | ||
| 444 | }; | ||
| 445 | |||
| 446 | enum class InvMemoryLayout : u32 { | ||
| 447 | BlockLinear = 0, | ||
| 448 | Linear = 1, | ||
| 449 | }; | ||
| 450 | |||
| 451 | enum class CounterReset : u32 { | 441 | enum class CounterReset : u32 { |
| 452 | SampleCnt = 0x01, | 442 | SampleCnt = 0x01, |
| 453 | Unk02 = 0x02, | 443 | Unk02 = 0x02, |
| @@ -589,21 +579,31 @@ public: | |||
| 589 | NegativeW = 7, | 579 | NegativeW = 7, |
| 590 | }; | 580 | }; |
| 591 | 581 | ||
| 582 | enum class SamplerIndex : u32 { | ||
| 583 | Independently = 0, | ||
| 584 | ViaHeaderIndex = 1, | ||
| 585 | }; | ||
| 586 | |||
| 587 | struct TileMode { | ||
| 588 | union { | ||
| 589 | BitField<0, 4, u32> block_width; | ||
| 590 | BitField<4, 4, u32> block_height; | ||
| 591 | BitField<8, 4, u32> block_depth; | ||
| 592 | BitField<12, 1, u32> is_pitch_linear; | ||
| 593 | BitField<16, 1, u32> is_3d; | ||
| 594 | }; | ||
| 595 | }; | ||
| 596 | static_assert(sizeof(TileMode) == 4); | ||
| 597 | |||
| 592 | struct RenderTargetConfig { | 598 | struct RenderTargetConfig { |
| 593 | u32 address_high; | 599 | u32 address_high; |
| 594 | u32 address_low; | 600 | u32 address_low; |
| 595 | u32 width; | 601 | u32 width; |
| 596 | u32 height; | 602 | u32 height; |
| 597 | Tegra::RenderTargetFormat format; | 603 | Tegra::RenderTargetFormat format; |
| 604 | TileMode tile_mode; | ||
| 598 | union { | 605 | union { |
| 599 | BitField<0, 3, u32> block_width; | 606 | BitField<0, 16, u32> depth; |
| 600 | BitField<4, 3, u32> block_height; | ||
| 601 | BitField<8, 3, u32> block_depth; | ||
| 602 | BitField<12, 1, InvMemoryLayout> type; | ||
| 603 | BitField<16, 1, u32> is_3d; | ||
| 604 | } memory_layout; | ||
| 605 | union { | ||
| 606 | BitField<0, 16, u32> layers; | ||
| 607 | BitField<16, 1, u32> volume; | 607 | BitField<16, 1, u32> volume; |
| 608 | }; | 608 | }; |
| 609 | u32 layer_stride; | 609 | u32 layer_stride; |
| @@ -832,7 +832,11 @@ public: | |||
| 832 | 832 | ||
| 833 | u32 patch_vertices; | 833 | u32 patch_vertices; |
| 834 | 834 | ||
| 835 | INSERT_UNION_PADDING_WORDS(0xC); | 835 | INSERT_UNION_PADDING_WORDS(0x4); |
| 836 | |||
| 837 | u32 fragment_barrier; | ||
| 838 | |||
| 839 | INSERT_UNION_PADDING_WORDS(0x7); | ||
| 836 | 840 | ||
| 837 | std::array<ScissorTest, NumViewports> scissor_test; | 841 | std::array<ScissorTest, NumViewports> scissor_test; |
| 838 | 842 | ||
| @@ -842,7 +846,15 @@ public: | |||
| 842 | u32 stencil_back_mask; | 846 | u32 stencil_back_mask; |
| 843 | u32 stencil_back_func_mask; | 847 | u32 stencil_back_func_mask; |
| 844 | 848 | ||
| 845 | INSERT_UNION_PADDING_WORDS(0xC); | 849 | INSERT_UNION_PADDING_WORDS(0x5); |
| 850 | |||
| 851 | u32 invalidate_texture_data_cache; | ||
| 852 | |||
| 853 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 854 | |||
| 855 | u32 tiled_cache_barrier; | ||
| 856 | |||
| 857 | INSERT_UNION_PADDING_WORDS(0x4); | ||
| 846 | 858 | ||
| 847 | u32 color_mask_common; | 859 | u32 color_mask_common; |
| 848 | 860 | ||
| @@ -866,12 +878,7 @@ public: | |||
| 866 | u32 address_high; | 878 | u32 address_high; |
| 867 | u32 address_low; | 879 | u32 address_low; |
| 868 | Tegra::DepthFormat format; | 880 | Tegra::DepthFormat format; |
| 869 | union { | 881 | TileMode tile_mode; |
| 870 | BitField<0, 4, u32> block_width; | ||
| 871 | BitField<4, 4, u32> block_height; | ||
| 872 | BitField<8, 4, u32> block_depth; | ||
| 873 | BitField<20, 1, InvMemoryLayout> type; | ||
| 874 | } memory_layout; | ||
| 875 | u32 layer_stride; | 882 | u32 layer_stride; |
| 876 | 883 | ||
| 877 | GPUVAddr Address() const { | 884 | GPUVAddr Address() const { |
| @@ -880,7 +887,18 @@ public: | |||
| 880 | } | 887 | } |
| 881 | } zeta; | 888 | } zeta; |
| 882 | 889 | ||
| 883 | INSERT_UNION_PADDING_WORDS(0x41); | 890 | struct { |
| 891 | union { | ||
| 892 | BitField<0, 16, u32> x; | ||
| 893 | BitField<16, 16, u32> width; | ||
| 894 | }; | ||
| 895 | union { | ||
| 896 | BitField<0, 16, u32> y; | ||
| 897 | BitField<16, 16, u32> height; | ||
| 898 | }; | ||
| 899 | } render_area; | ||
| 900 | |||
| 901 | INSERT_UNION_PADDING_WORDS(0x3F); | ||
| 884 | 902 | ||
| 885 | union { | 903 | union { |
| 886 | BitField<0, 4, u32> stencil; | 904 | BitField<0, 4, u32> stencil; |
| @@ -921,7 +939,7 @@ public: | |||
| 921 | BitField<25, 3, u32> map_7; | 939 | BitField<25, 3, u32> map_7; |
| 922 | }; | 940 | }; |
| 923 | 941 | ||
| 924 | u32 GetMap(std::size_t index) const { | 942 | u32 Map(std::size_t index) const { |
| 925 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, | 943 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, |
| 926 | map_4, map_5, map_6, map_7}; | 944 | map_4, map_5, map_6, map_7}; |
| 927 | ASSERT(index < maps.size()); | 945 | ASSERT(index < maps.size()); |
| @@ -934,11 +952,13 @@ public: | |||
| 934 | u32 zeta_width; | 952 | u32 zeta_width; |
| 935 | u32 zeta_height; | 953 | u32 zeta_height; |
| 936 | union { | 954 | union { |
| 937 | BitField<0, 16, u32> zeta_layers; | 955 | BitField<0, 16, u32> zeta_depth; |
| 938 | BitField<16, 1, u32> zeta_volume; | 956 | BitField<16, 1, u32> zeta_volume; |
| 939 | }; | 957 | }; |
| 940 | 958 | ||
| 941 | INSERT_UNION_PADDING_WORDS(0x26); | 959 | SamplerIndex sampler_index; |
| 960 | |||
| 961 | INSERT_UNION_PADDING_WORDS(0x25); | ||
| 942 | 962 | ||
| 943 | u32 depth_test_enable; | 963 | u32 depth_test_enable; |
| 944 | 964 | ||
| @@ -964,6 +984,7 @@ public: | |||
| 964 | float b; | 984 | float b; |
| 965 | float a; | 985 | float a; |
| 966 | } blend_color; | 986 | } blend_color; |
| 987 | |||
| 967 | INSERT_UNION_PADDING_WORDS(0x4); | 988 | INSERT_UNION_PADDING_WORDS(0x4); |
| 968 | 989 | ||
| 969 | struct { | 990 | struct { |
| @@ -1001,7 +1022,12 @@ public: | |||
| 1001 | float line_width_smooth; | 1022 | float line_width_smooth; |
| 1002 | float line_width_aliased; | 1023 | float line_width_aliased; |
| 1003 | 1024 | ||
| 1004 | INSERT_UNION_PADDING_WORDS(0x1F); | 1025 | INSERT_UNION_PADDING_WORDS(0x1B); |
| 1026 | |||
| 1027 | u32 invalidate_sampler_cache_no_wfi; | ||
| 1028 | u32 invalidate_texture_header_cache_no_wfi; | ||
| 1029 | |||
| 1030 | INSERT_UNION_PADDING_WORDS(0x2); | ||
| 1005 | 1031 | ||
| 1006 | u32 vb_element_base; | 1032 | u32 vb_element_base; |
| 1007 | u32 vb_base_instance; | 1033 | u32 vb_base_instance; |
| @@ -1045,13 +1071,13 @@ public: | |||
| 1045 | } condition; | 1071 | } condition; |
| 1046 | 1072 | ||
| 1047 | struct { | 1073 | struct { |
| 1048 | u32 tsc_address_high; | 1074 | u32 address_high; |
| 1049 | u32 tsc_address_low; | 1075 | u32 address_low; |
| 1050 | u32 tsc_limit; | 1076 | u32 limit; |
| 1051 | 1077 | ||
| 1052 | GPUVAddr TSCAddress() const { | 1078 | GPUVAddr Address() const { |
| 1053 | return static_cast<GPUVAddr>( | 1079 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 1054 | (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); | 1080 | address_low); |
| 1055 | } | 1081 | } |
| 1056 | } tsc; | 1082 | } tsc; |
| 1057 | 1083 | ||
| @@ -1062,13 +1088,13 @@ public: | |||
| 1062 | u32 line_smooth_enable; | 1088 | u32 line_smooth_enable; |
| 1063 | 1089 | ||
| 1064 | struct { | 1090 | struct { |
| 1065 | u32 tic_address_high; | 1091 | u32 address_high; |
| 1066 | u32 tic_address_low; | 1092 | u32 address_low; |
| 1067 | u32 tic_limit; | 1093 | u32 limit; |
| 1068 | 1094 | ||
| 1069 | GPUVAddr TICAddress() const { | 1095 | GPUVAddr Address() const { |
| 1070 | return static_cast<GPUVAddr>( | 1096 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 1071 | (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); | 1097 | address_low); |
| 1072 | } | 1098 | } |
| 1073 | } tic; | 1099 | } tic; |
| 1074 | 1100 | ||
| @@ -1397,12 +1423,6 @@ public: | |||
| 1397 | 1423 | ||
| 1398 | void FlushMMEInlineDraw(); | 1424 | void FlushMMEInlineDraw(); |
| 1399 | 1425 | ||
| 1400 | /// Given a texture handle, returns the TSC and TIC entries. | ||
| 1401 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; | ||
| 1402 | |||
| 1403 | /// Returns the texture information for a specific texture in a specific shader stage. | ||
| 1404 | Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; | ||
| 1405 | |||
| 1406 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 1426 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 1407 | 1427 | ||
| 1408 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | 1428 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |
| @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | |||
| 1598 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | 1618 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); |
| 1599 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | 1619 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); |
| 1600 | ASSERT_REG_POSITION(patch_vertices, 0x373); | 1620 | ASSERT_REG_POSITION(patch_vertices, 0x373); |
| 1621 | ASSERT_REG_POSITION(fragment_barrier, 0x378); | ||
| 1601 | ASSERT_REG_POSITION(scissor_test, 0x380); | 1622 | ASSERT_REG_POSITION(scissor_test, 0x380); |
| 1602 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); | 1623 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); |
| 1603 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); | 1624 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); |
| 1604 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); | 1625 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); |
| 1626 | ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); | ||
| 1627 | ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); | ||
| 1605 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); | 1628 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); |
| 1606 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); | 1629 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); |
| 1607 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | 1630 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); |
| @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); | |||
| 1609 | ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); | 1632 | ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); |
| 1610 | ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); | 1633 | ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); |
| 1611 | ASSERT_REG_POSITION(zeta, 0x3F8); | 1634 | ASSERT_REG_POSITION(zeta, 0x3F8); |
| 1635 | ASSERT_REG_POSITION(render_area, 0x3FD); | ||
| 1612 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1636 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1613 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | 1637 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); |
| 1614 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1638 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); | |||
| 1617 | ASSERT_REG_POSITION(rt_control, 0x487); | 1641 | ASSERT_REG_POSITION(rt_control, 0x487); |
| 1618 | ASSERT_REG_POSITION(zeta_width, 0x48a); | 1642 | ASSERT_REG_POSITION(zeta_width, 0x48a); |
| 1619 | ASSERT_REG_POSITION(zeta_height, 0x48b); | 1643 | ASSERT_REG_POSITION(zeta_height, 0x48b); |
| 1620 | ASSERT_REG_POSITION(zeta_layers, 0x48c); | 1644 | ASSERT_REG_POSITION(zeta_depth, 0x48c); |
| 1645 | ASSERT_REG_POSITION(sampler_index, 0x48D); | ||
| 1621 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); | 1646 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); |
| 1622 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); | 1647 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); |
| 1623 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); | 1648 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); |
| @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); | |||
| 1641 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); | 1666 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); |
| 1642 | ASSERT_REG_POSITION(line_width_smooth, 0x4EC); | 1667 | ASSERT_REG_POSITION(line_width_smooth, 0x4EC); |
| 1643 | ASSERT_REG_POSITION(line_width_aliased, 0x4ED); | 1668 | ASSERT_REG_POSITION(line_width_aliased, 0x4ED); |
| 1669 | ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); | ||
| 1670 | ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); | ||
| 1644 | ASSERT_REG_POSITION(vb_element_base, 0x50D); | 1671 | ASSERT_REG_POSITION(vb_element_base, 0x50D); |
| 1645 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); | 1672 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); |
| 1646 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | 1673 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | void MaxwellDMA::CopyBlockLinearToPitch() { | 98 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 99 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||
| 99 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | 100 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); |
| 100 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); | 101 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); |
| 101 | 102 | ||
| @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | void MaxwellDMA::CopyPitchToBlockLinear() { | 138 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 139 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | ||
| 140 | |||
| 138 | const auto& dst_params = regs.dst_params; | 141 | const auto& dst_params = regs.dst_params; |
| 139 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | 142 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |
| 140 | const u32 width = dst_params.width; | 143 | const u32 width = dst_params.width; |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c5f26896e..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/core.h" | 11 | #include "core/core.h" |
| 12 | #include "video_core/delayed_destruction_ring.h" | ||
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 13 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| @@ -47,6 +48,11 @@ protected: | |||
| 47 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | 48 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> |
| 48 | class FenceManager { | 49 | class FenceManager { |
| 49 | public: | 50 | public: |
| 51 | /// Notify the fence manager about a new frame | ||
| 52 | void TickFrame() { | ||
| 53 | delayed_destruction_ring.Tick(); | ||
| 54 | } | ||
| 55 | |||
| 50 | void SignalSemaphore(GPUVAddr addr, u32 value) { | 56 | void SignalSemaphore(GPUVAddr addr, u32 value) { |
| 51 | TryReleasePendingFences(); | 57 | TryReleasePendingFences(); |
| 52 | const bool should_flush = ShouldFlush(); | 58 | const bool should_flush = ShouldFlush(); |
| @@ -86,7 +92,7 @@ public: | |||
| 86 | } else { | 92 | } else { |
| 87 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 93 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 88 | } | 94 | } |
| 89 | fences.pop(); | 95 | PopFence(); |
| 90 | } | 96 | } |
| 91 | } | 97 | } |
| 92 | 98 | ||
| @@ -132,7 +138,7 @@ private: | |||
| 132 | } else { | 138 | } else { |
| 133 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 139 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 134 | } | 140 | } |
| 135 | fences.pop(); | 141 | PopFence(); |
| 136 | } | 142 | } |
| 137 | } | 143 | } |
| 138 | 144 | ||
| @@ -158,7 +164,14 @@ private: | |||
| 158 | query_cache.CommitAsyncFlushes(); | 164 | query_cache.CommitAsyncFlushes(); |
| 159 | } | 165 | } |
| 160 | 166 | ||
| 167 | void PopFence() { | ||
| 168 | delayed_destruction_ring.Push(std::move(fences.front())); | ||
| 169 | fences.pop(); | ||
| 170 | } | ||
| 171 | |||
| 161 | std::queue<TFence> fences; | 172 | std::queue<TFence> fences; |
| 173 | |||
| 174 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | ||
| 162 | }; | 175 | }; |
| 163 | 176 | ||
| 164 | } // namespace VideoCommon | 177 | } // namespace VideoCommon |
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h new file mode 100644 index 000000000..b86c3a757 --- /dev/null +++ b/src/video_core/framebuffer_config.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Tegra { | ||
| 8 | |||
| 9 | /** | ||
| 10 | * Struct describing framebuffer configuration | ||
| 11 | */ | ||
| 12 | struct FramebufferConfig { | ||
| 13 | enum class PixelFormat : u32 { | ||
| 14 | A8B8G8R8_UNORM = 1, | ||
| 15 | RGB565_UNORM = 4, | ||
| 16 | B8G8R8A8_UNORM = 5, | ||
| 17 | }; | ||
| 18 | |||
| 19 | VAddr address{}; | ||
| 20 | u32 offset{}; | ||
| 21 | u32 width{}; | ||
| 22 | u32 height{}; | ||
| 23 | u32 stride{}; | ||
| 24 | PixelFormat pixel_format{}; | ||
| 25 | |||
| 26 | using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||
| 27 | TransformFlags transform_flags{}; | ||
| 28 | Common::Rectangle<int> crop_rect; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Tegra | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e2512a7f2..6ab06775f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| 11 | #include "core/core_timing_util.h" | 11 | #include "core/core_timing_util.h" |
| 12 | #include "core/frontend/emu_window.h" | 12 | #include "core/frontend/emu_window.h" |
| 13 | #include "core/hardware_interrupt_manager.h" | ||
| 13 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 14 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 15 | #include "video_core/engines/fermi_2d.h" | 16 | #include "video_core/engines/fermi_2d.h" |
| @@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) | |||
| 36 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | 37 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, |
| 37 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | 38 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, |
| 38 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | 39 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, |
| 39 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} | 40 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 41 | gpu_thread{system_, is_async_} {} | ||
| 40 | 42 | ||
| 41 | GPU::~GPU() = default; | 43 | GPU::~GPU() = default; |
| 42 | 44 | ||
| @@ -198,10 +200,6 @@ void GPU::SyncGuestHost() { | |||
| 198 | renderer->Rasterizer().SyncGuestHost(); | 200 | renderer->Rasterizer().SyncGuestHost(); |
| 199 | } | 201 | } |
| 200 | 202 | ||
| 201 | void GPU::OnCommandListEnd() { | ||
| 202 | renderer->Rasterizer().ReleaseFences(); | ||
| 203 | } | ||
| 204 | |||
| 205 | enum class GpuSemaphoreOperation { | 203 | enum class GpuSemaphoreOperation { |
| 206 | AcquireEqual = 0x1, | 204 | AcquireEqual = 0x1, |
| 207 | WriteLong = 0x2, | 205 | WriteLong = 0x2, |
| @@ -461,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() { | |||
| 461 | } | 459 | } |
| 462 | } | 460 | } |
| 463 | 461 | ||
| 462 | void GPU::Start() { | ||
| 463 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); | ||
| 464 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 465 | cpu_context->MakeCurrent(); | ||
| 466 | } | ||
| 467 | |||
| 468 | void GPU::ObtainContext() { | ||
| 469 | cpu_context->MakeCurrent(); | ||
| 470 | } | ||
| 471 | |||
| 472 | void GPU::ReleaseContext() { | ||
| 473 | cpu_context->DoneCurrent(); | ||
| 474 | } | ||
| 475 | |||
| 476 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 477 | gpu_thread.SubmitList(std::move(entries)); | ||
| 478 | } | ||
| 479 | |||
| 480 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 481 | if (!use_nvdec) { | ||
| 482 | return; | ||
| 483 | } | ||
| 484 | // This condition fires when a video stream ends, clear all intermediary data | ||
| 485 | if (entries[0].raw == 0xDEADB33F) { | ||
| 486 | cdma_pusher.reset(); | ||
| 487 | return; | ||
| 488 | } | ||
| 489 | if (!cdma_pusher) { | ||
| 490 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 491 | } | ||
| 492 | |||
| 493 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 494 | // TODO(ameerj): RE proper async nvdec operation | ||
| 495 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 496 | |||
| 497 | cdma_pusher->Push(std::move(entries)); | ||
| 498 | cdma_pusher->DispatchCalls(); | ||
| 499 | } | ||
| 500 | |||
| 501 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 502 | gpu_thread.SwapBuffers(framebuffer); | ||
| 503 | } | ||
| 504 | |||
| 505 | void GPU::FlushRegion(VAddr addr, u64 size) { | ||
| 506 | gpu_thread.FlushRegion(addr, size); | ||
| 507 | } | ||
| 508 | |||
| 509 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | ||
| 510 | gpu_thread.InvalidateRegion(addr, size); | ||
| 511 | } | ||
| 512 | |||
| 513 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 514 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 515 | } | ||
| 516 | |||
| 517 | void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 518 | auto& interrupt_manager = system.InterruptManager(); | ||
| 519 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 520 | } | ||
| 521 | |||
| 522 | void GPU::WaitIdle() const { | ||
| 523 | gpu_thread.WaitIdle(); | ||
| 524 | } | ||
| 525 | |||
| 526 | void GPU::OnCommandListEnd() { | ||
| 527 | if (is_async) { | ||
| 528 | // This command only applies to asynchronous GPU mode | ||
| 529 | gpu_thread.OnCommandListEnd(); | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 464 | } // namespace Tegra | 533 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 660641d04..d81e38680 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include "core/hle/service/nvflinger/buffer_queue.h" | 15 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 16 | #include "video_core/cdma_pusher.h" | 16 | #include "video_core/cdma_pusher.h" |
| 17 | #include "video_core/dma_pusher.h" | 17 | #include "video_core/dma_pusher.h" |
| 18 | #include "video_core/framebuffer_config.h" | ||
| 19 | #include "video_core/gpu_thread.h" | ||
| 18 | 20 | ||
| 19 | using CacheAddr = std::uintptr_t; | 21 | using CacheAddr = std::uintptr_t; |
| 20 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { | 22 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { |
| @@ -101,28 +103,6 @@ enum class DepthFormat : u32 { | |||
| 101 | struct CommandListHeader; | 103 | struct CommandListHeader; |
| 102 | class DebugContext; | 104 | class DebugContext; |
| 103 | 105 | ||
| 104 | /** | ||
| 105 | * Struct describing framebuffer configuration | ||
| 106 | */ | ||
| 107 | struct FramebufferConfig { | ||
| 108 | enum class PixelFormat : u32 { | ||
| 109 | A8B8G8R8_UNORM = 1, | ||
| 110 | RGB565_UNORM = 4, | ||
| 111 | B8G8R8A8_UNORM = 5, | ||
| 112 | }; | ||
| 113 | |||
| 114 | VAddr address; | ||
| 115 | u32 offset; | ||
| 116 | u32 width; | ||
| 117 | u32 height; | ||
| 118 | u32 stride; | ||
| 119 | PixelFormat pixel_format; | ||
| 120 | |||
| 121 | using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||
| 122 | TransformFlags transform_flags; | ||
| 123 | Common::Rectangle<int> crop_rect; | ||
| 124 | }; | ||
| 125 | |||
| 126 | namespace Engines { | 106 | namespace Engines { |
| 127 | class Fermi2D; | 107 | class Fermi2D; |
| 128 | class Maxwell3D; | 108 | class Maxwell3D; |
| @@ -141,7 +121,7 @@ enum class EngineID { | |||
| 141 | 121 | ||
| 142 | class MemoryManager; | 122 | class MemoryManager; |
| 143 | 123 | ||
| 144 | class GPU { | 124 | class GPU final { |
| 145 | public: | 125 | public: |
| 146 | struct MethodCall { | 126 | struct MethodCall { |
| 147 | u32 method{}; | 127 | u32 method{}; |
| @@ -159,7 +139,7 @@ public: | |||
| 159 | }; | 139 | }; |
| 160 | 140 | ||
| 161 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); | 141 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); |
| 162 | virtual ~GPU(); | 142 | ~GPU(); |
| 163 | 143 | ||
| 164 | /// Binds a renderer to the GPU. | 144 | /// Binds a renderer to the GPU. |
| 165 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); | 145 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); |
| @@ -176,7 +156,7 @@ public: | |||
| 176 | /// Synchronizes CPU writes with Host GPU memory. | 156 | /// Synchronizes CPU writes with Host GPU memory. |
| 177 | void SyncGuestHost(); | 157 | void SyncGuestHost(); |
| 178 | /// Signal the ending of command list. | 158 | /// Signal the ending of command list. |
| 179 | virtual void OnCommandListEnd(); | 159 | void OnCommandListEnd(); |
| 180 | 160 | ||
| 181 | /// Request a host GPU memory flush from the CPU. | 161 | /// Request a host GPU memory flush from the CPU. |
| 182 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 162 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| @@ -240,7 +220,7 @@ public: | |||
| 240 | } | 220 | } |
| 241 | 221 | ||
| 242 | // Waits for the GPU to finish working | 222 | // Waits for the GPU to finish working |
| 243 | virtual void WaitIdle() const = 0; | 223 | void WaitIdle() const; |
| 244 | 224 | ||
| 245 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 225 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 246 | void WaitFence(u32 syncpoint_id, u32 value); | 226 | void WaitFence(u32 syncpoint_id, u32 value); |
| @@ -330,34 +310,34 @@ public: | |||
| 330 | /// Performs any additional setup necessary in order to begin GPU emulation. | 310 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 331 | /// This can be used to launch any necessary threads and register any necessary | 311 | /// This can be used to launch any necessary threads and register any necessary |
| 332 | /// core timing events. | 312 | /// core timing events. |
| 333 | virtual void Start() = 0; | 313 | void Start(); |
| 334 | 314 | ||
| 335 | /// Obtain the CPU Context | 315 | /// Obtain the CPU Context |
| 336 | virtual void ObtainContext() = 0; | 316 | void ObtainContext(); |
| 337 | 317 | ||
| 338 | /// Release the CPU Context | 318 | /// Release the CPU Context |
| 339 | virtual void ReleaseContext() = 0; | 319 | void ReleaseContext(); |
| 340 | 320 | ||
| 341 | /// Push GPU command entries to be processed | 321 | /// Push GPU command entries to be processed |
| 342 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 322 | void PushGPUEntries(Tegra::CommandList&& entries); |
| 343 | 323 | ||
| 344 | /// Push GPU command buffer entries to be processed | 324 | /// Push GPU command buffer entries to be processed |
| 345 | virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; | 325 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); |
| 346 | 326 | ||
| 347 | /// Swap buffers (render frame) | 327 | /// Swap buffers (render frame) |
| 348 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | 328 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 349 | 329 | ||
| 350 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 330 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 351 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 331 | void FlushRegion(VAddr addr, u64 size); |
| 352 | 332 | ||
| 353 | /// Notify rasterizer that any caches of the specified region should be invalidated | 333 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 354 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 334 | void InvalidateRegion(VAddr addr, u64 size); |
| 355 | 335 | ||
| 356 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 336 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 357 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 337 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 358 | 338 | ||
| 359 | protected: | 339 | protected: |
| 360 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | 340 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; |
| 361 | 341 | ||
| 362 | private: | 342 | private: |
| 363 | void ProcessBindMethod(const MethodCall& method_call); | 343 | void ProcessBindMethod(const MethodCall& method_call); |
| @@ -427,6 +407,9 @@ private: | |||
| 427 | std::mutex flush_request_mutex; | 407 | std::mutex flush_request_mutex; |
| 428 | 408 | ||
| 429 | const bool is_async; | 409 | const bool is_async; |
| 410 | |||
| 411 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 412 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 430 | }; | 413 | }; |
| 431 | 414 | ||
| 432 | #define ASSERT_REG_POSITION(field_name, position) \ | 415 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp deleted file mode 100644 index 6cc091ecd..000000000 --- a/src/video_core/gpu_asynch.cpp +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hardware_interrupt_manager.h" | ||
| 7 | #include "video_core/gpu_asynch.h" | ||
| 8 | #include "video_core/gpu_thread.h" | ||
| 9 | #include "video_core/renderer_base.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_) | ||
| 14 | : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {} | ||
| 15 | |||
| 16 | GPUAsynch::~GPUAsynch() = default; | ||
| 17 | |||
| 18 | void GPUAsynch::Start() { | ||
| 19 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); | ||
| 20 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 21 | cpu_context->MakeCurrent(); | ||
| 22 | } | ||
| 23 | |||
| 24 | void GPUAsynch::ObtainContext() { | ||
| 25 | cpu_context->MakeCurrent(); | ||
| 26 | } | ||
| 27 | |||
| 28 | void GPUAsynch::ReleaseContext() { | ||
| 29 | cpu_context->DoneCurrent(); | ||
| 30 | } | ||
| 31 | |||
| 32 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 33 | gpu_thread.SubmitList(std::move(entries)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 37 | if (!use_nvdec) { | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | // This condition fires when a video stream ends, clear all intermediary data | ||
| 41 | if (entries[0].raw == 0xDEADB33F) { | ||
| 42 | cdma_pusher.reset(); | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | if (!cdma_pusher) { | ||
| 46 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 47 | } | ||
| 48 | |||
| 49 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 50 | // TODO(ameerj): RE proper async nvdec operation | ||
| 51 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 52 | |||
| 53 | cdma_pusher->Push(std::move(entries)); | ||
| 54 | cdma_pusher->DispatchCalls(); | ||
| 55 | } | ||
| 56 | |||
| 57 | void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 58 | gpu_thread.SwapBuffers(framebuffer); | ||
| 59 | } | ||
| 60 | |||
| 61 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | ||
| 62 | gpu_thread.FlushRegion(addr, size); | ||
| 63 | } | ||
| 64 | |||
| 65 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 66 | gpu_thread.InvalidateRegion(addr, size); | ||
| 67 | } | ||
| 68 | |||
| 69 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 70 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 71 | } | ||
| 72 | |||
| 73 | void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 74 | auto& interrupt_manager = system.InterruptManager(); | ||
| 75 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 76 | } | ||
| 77 | |||
| 78 | void GPUAsynch::WaitIdle() const { | ||
| 79 | gpu_thread.WaitIdle(); | ||
| 80 | } | ||
| 81 | |||
| 82 | void GPUAsynch::OnCommandListEnd() { | ||
| 83 | gpu_thread.OnCommandListEnd(); | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h deleted file mode 100644 index a384113f4..000000000 --- a/src/video_core/gpu_asynch.h +++ /dev/null | |||
| @@ -1,47 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | #include "video_core/gpu_thread.h" | ||
| 9 | |||
| 10 | namespace Core::Frontend { | ||
| 11 | class GraphicsContext; | ||
| 12 | } | ||
| 13 | |||
| 14 | namespace VideoCore { | ||
| 15 | class RendererBase; | ||
| 16 | } // namespace VideoCore | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | /// Implementation of GPU interface that runs the GPU asynchronously | ||
| 21 | class GPUAsynch final : public Tegra::GPU { | ||
| 22 | public: | ||
| 23 | explicit GPUAsynch(Core::System& system_, bool use_nvdec_); | ||
| 24 | ~GPUAsynch() override; | ||
| 25 | |||
| 26 | void Start() override; | ||
| 27 | void ObtainContext() override; | ||
| 28 | void ReleaseContext() override; | ||
| 29 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 30 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; | ||
| 31 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 32 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 33 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 34 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 35 | void WaitIdle() const override; | ||
| 36 | |||
| 37 | void OnCommandListEnd() override; | ||
| 38 | |||
| 39 | protected: | ||
| 40 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||
| 41 | |||
| 42 | private: | ||
| 43 | GPUThread::ThreadManager gpu_thread; | ||
| 44 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp deleted file mode 100644 index 1e9d4b9b2..000000000 --- a/src/video_core/gpu_synch.cpp +++ /dev/null | |||
| @@ -1,61 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_synch.h" | ||
| 6 | #include "video_core/renderer_base.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | |||
| 10 | GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {} | ||
| 11 | |||
| 12 | GPUSynch::~GPUSynch() = default; | ||
| 13 | |||
| 14 | void GPUSynch::Start() {} | ||
| 15 | |||
| 16 | void GPUSynch::ObtainContext() { | ||
| 17 | renderer->Context().MakeCurrent(); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUSynch::ReleaseContext() { | ||
| 21 | renderer->Context().DoneCurrent(); | ||
| 22 | } | ||
| 23 | |||
| 24 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 25 | dma_pusher->Push(std::move(entries)); | ||
| 26 | dma_pusher->DispatchCalls(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 30 | if (!use_nvdec) { | ||
| 31 | return; | ||
| 32 | } | ||
| 33 | // This condition fires when a video stream ends, clears all intermediary data | ||
| 34 | if (entries[0].raw == 0xDEADB33F) { | ||
| 35 | cdma_pusher.reset(); | ||
| 36 | return; | ||
| 37 | } | ||
| 38 | if (!cdma_pusher) { | ||
| 39 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 40 | } | ||
| 41 | cdma_pusher->Push(std::move(entries)); | ||
| 42 | cdma_pusher->DispatchCalls(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 46 | renderer->SwapBuffers(framebuffer); | ||
| 47 | } | ||
| 48 | |||
| 49 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { | ||
| 50 | renderer->Rasterizer().FlushRegion(addr, size); | ||
| 51 | } | ||
| 52 | |||
| 53 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 54 | renderer->Rasterizer().InvalidateRegion(addr, size); | ||
| 55 | } | ||
| 56 | |||
| 57 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 58 | renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); | ||
| 59 | } | ||
| 60 | |||
| 61 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h deleted file mode 100644 index c5904b8db..000000000 --- a/src/video_core/gpu_synch.h +++ /dev/null | |||
| @@ -1,41 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | namespace Core::Frontend { | ||
| 10 | class GraphicsContext; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace VideoCore { | ||
| 14 | class RendererBase; | ||
| 15 | } // namespace VideoCore | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | /// Implementation of GPU interface that runs the GPU synchronously | ||
| 20 | class GPUSynch final : public Tegra::GPU { | ||
| 21 | public: | ||
| 22 | explicit GPUSynch(Core::System& system_, bool use_nvdec_); | ||
| 23 | ~GPUSynch() override; | ||
| 24 | |||
| 25 | void Start() override; | ||
| 26 | void ObtainContext() override; | ||
| 27 | void ReleaseContext() override; | ||
| 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 29 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; | ||
| 30 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 31 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 32 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 33 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 34 | void WaitIdle() const override {} | ||
| 35 | |||
| 36 | protected: | ||
| 37 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | ||
| 38 | [[maybe_unused]] u32 value) const override {} | ||
| 39 | }; | ||
| 40 | |||
| 41 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index e27218b96..7e490bcc3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "common/scope_exit.h" | ||
| 7 | #include "common/thread.h" | 8 | #include "common/thread.h" |
| 8 | #include "core/core.h" | 9 | #include "core/core.h" |
| 9 | #include "core/frontend/emu_window.h" | 10 | #include "core/frontend/emu_window.h" |
| @@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 21 | SynchState& state, Tegra::CDmaPusher& cdma_pusher) { | 22 | SynchState& state, Tegra::CDmaPusher& cdma_pusher) { |
| 22 | std::string name = "yuzu:GPU"; | 23 | std::string name = "yuzu:GPU"; |
| 23 | MicroProfileOnThreadCreate(name.c_str()); | 24 | MicroProfileOnThreadCreate(name.c_str()); |
| 25 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||
| 26 | |||
| 24 | Common::SetCurrentThreadName(name.c_str()); | 27 | Common::SetCurrentThreadName(name.c_str()); |
| 25 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | 28 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); |
| 26 | system.RegisterHostThread(); | 29 | system.RegisterHostThread(); |
| @@ -65,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 65 | } | 68 | } |
| 66 | } | 69 | } |
| 67 | 70 | ||
| 68 | ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} | 71 | ThreadManager::ThreadManager(Core::System& system_, bool is_async_) |
| 72 | : system{system_}, is_async{is_async_} {} | ||
| 69 | 73 | ||
| 70 | ThreadManager::~ThreadManager() { | 74 | ThreadManager::~ThreadManager() { |
| 71 | if (!thread.joinable()) { | 75 | if (!thread.joinable()) { |
| @@ -97,19 +101,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 97 | } | 101 | } |
| 98 | 102 | ||
| 99 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 103 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 100 | if (!Settings::IsGPULevelHigh()) { | 104 | if (!is_async) { |
| 105 | // Always flush with synchronous GPU mode | ||
| 101 | PushCommand(FlushRegionCommand(addr, size)); | 106 | PushCommand(FlushRegionCommand(addr, size)); |
| 102 | return; | 107 | return; |
| 103 | } | 108 | } |
| 104 | if (!Settings::IsGPULevelExtreme()) { | 109 | |
| 105 | return; | 110 | // Asynchronous GPU mode |
| 106 | } | 111 | switch (Settings::values.gpu_accuracy.GetValue()) { |
| 107 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | 112 | case Settings::GPUAccuracy::Normal: |
| 113 | PushCommand(FlushRegionCommand(addr, size)); | ||
| 114 | break; | ||
| 115 | case Settings::GPUAccuracy::High: | ||
| 116 | // TODO(bunnei): Is this right? Preserving existing behavior for now | ||
| 117 | break; | ||
| 118 | case Settings::GPUAccuracy::Extreme: { | ||
| 108 | auto& gpu = system.GPU(); | 119 | auto& gpu = system.GPU(); |
| 109 | u64 fence = gpu.RequestFlush(addr, size); | 120 | u64 fence = gpu.RequestFlush(addr, size); |
| 110 | PushCommand(GPUTickCommand()); | 121 | PushCommand(GPUTickCommand()); |
| 111 | while (fence > gpu.CurrentFlushRequestFence()) { | 122 | while (fence > gpu.CurrentFlushRequestFence()) { |
| 112 | } | 123 | } |
| 124 | break; | ||
| 125 | } | ||
| 126 | default: | ||
| 127 | UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); | ||
| 113 | } | 128 | } |
| 114 | } | 129 | } |
| 115 | 130 | ||
| @@ -123,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 123 | } | 138 | } |
| 124 | 139 | ||
| 125 | void ThreadManager::WaitIdle() const { | 140 | void ThreadManager::WaitIdle() const { |
| 126 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | 141 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && |
| 142 | system.IsPoweredOn()) { | ||
| 127 | } | 143 | } |
| 128 | } | 144 | } |
| 129 | 145 | ||
| @@ -134,6 +150,12 @@ void ThreadManager::OnCommandListEnd() { | |||
| 134 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 150 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 135 | const u64 fence{++state.last_fence}; | 151 | const u64 fence{++state.last_fence}; |
| 136 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 152 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 153 | |||
| 154 | if (!is_async) { | ||
| 155 | // In synchronous GPU mode, block the caller until the command has executed | ||
| 156 | WaitIdle(); | ||
| 157 | } | ||
| 158 | |||
| 137 | return fence; | 159 | return fence; |
| 138 | } | 160 | } |
| 139 | 161 | ||
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index f1c52cd9e..2775629e7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -10,8 +10,9 @@ | |||
| 10 | #include <optional> | 10 | #include <optional> |
| 11 | #include <thread> | 11 | #include <thread> |
| 12 | #include <variant> | 12 | #include <variant> |
| 13 | |||
| 13 | #include "common/threadsafe_queue.h" | 14 | #include "common/threadsafe_queue.h" |
| 14 | #include "video_core/gpu.h" | 15 | #include "video_core/framebuffer_config.h" |
| 15 | 16 | ||
| 16 | namespace Tegra { | 17 | namespace Tegra { |
| 17 | struct FramebufferConfig; | 18 | struct FramebufferConfig; |
| @@ -25,6 +26,10 @@ class GraphicsContext; | |||
| 25 | class System; | 26 | class System; |
| 26 | } // namespace Core | 27 | } // namespace Core |
| 27 | 28 | ||
| 29 | namespace VideoCore { | ||
| 30 | class RendererBase; | ||
| 31 | } // namespace VideoCore | ||
| 32 | |||
| 28 | namespace VideoCommon::GPUThread { | 33 | namespace VideoCommon::GPUThread { |
| 29 | 34 | ||
| 30 | /// Command to signal to the GPU thread that processing has ended | 35 | /// Command to signal to the GPU thread that processing has ended |
| @@ -112,7 +117,7 @@ struct SynchState final { | |||
| 112 | /// Class used to manage the GPU thread | 117 | /// Class used to manage the GPU thread |
| 113 | class ThreadManager final { | 118 | class ThreadManager final { |
| 114 | public: | 119 | public: |
| 115 | explicit ThreadManager(Core::System& system_); | 120 | explicit ThreadManager(Core::System& system_, bool is_async_); |
| 116 | ~ThreadManager(); | 121 | ~ThreadManager(); |
| 117 | 122 | ||
| 118 | /// Creates and starts the GPU thread. | 123 | /// Creates and starts the GPU thread. |
| @@ -150,6 +155,7 @@ private: | |||
| 150 | Core::System& system; | 155 | Core::System& system; |
| 151 | std::thread thread; | 156 | std::thread thread; |
| 152 | std::thread::id thread_id; | 157 | std::thread::id thread_id; |
| 158 | const bool is_async; | ||
| 153 | }; | 159 | }; |
| 154 | 160 | ||
| 155 | } // namespace VideoCommon::GPUThread | 161 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -1,8 +1,26 @@ | |||
| 1 | set(SHADER_SOURCES | 1 | set(SHADER_FILES |
| 2 | block_linear_unswizzle_2d.comp | ||
| 3 | block_linear_unswizzle_3d.comp | ||
| 4 | convert_depth_to_float.frag | ||
| 5 | convert_float_to_depth.frag | ||
| 6 | full_screen_triangle.vert | ||
| 7 | opengl_copy_bc4.comp | ||
| 2 | opengl_present.frag | 8 | opengl_present.frag |
| 3 | opengl_present.vert | 9 | opengl_present.vert |
| 10 | pitch_unswizzle.comp | ||
| 11 | vulkan_blit_color_float.frag | ||
| 12 | vulkan_blit_depth_stencil.frag | ||
| 13 | vulkan_present.frag | ||
| 14 | vulkan_present.vert | ||
| 15 | vulkan_quad_array.comp | ||
| 16 | vulkan_quad_indexed.comp | ||
| 17 | vulkan_uint8.comp | ||
| 4 | ) | 18 | ) |
| 5 | 19 | ||
| 20 | find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) | ||
| 21 | |||
| 22 | set(GLSL_FLAGS "") | ||
| 23 | |||
| 6 | set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) | 24 | set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) |
| 7 | set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) | 25 | set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) |
| 8 | set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) | 26 | set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) |
| @@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) | |||
| 10 | set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) | 28 | set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) |
| 11 | set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) | 29 | set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) |
| 12 | 30 | ||
| 13 | foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) | 31 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) |
| 14 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) | 32 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) |
| 15 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) | 33 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) |
| 16 | set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) | 34 | # Skip generating source headers on Vulkan exclusive files |
| 17 | add_custom_command( | 35 | if (NOT ${FILENAME} MATCHES "vulkan.*") |
| 18 | OUTPUT | 36 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) |
| 19 | ${HEADER_FILE} | 37 | add_custom_command( |
| 20 | COMMAND | 38 | OUTPUT |
| 21 | ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} | 39 | ${SOURCE_HEADER_FILE} |
| 22 | MAIN_DEPENDENCY | 40 | COMMAND |
| 23 | ${SOURCE_FILE} | 41 | ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} |
| 24 | DEPENDS | 42 | MAIN_DEPENDENCY |
| 25 | ${INPUT_FILE} | 43 | ${SOURCE_FILE} |
| 26 | # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified | 44 | DEPENDS |
| 27 | ) | 45 | ${INPUT_FILE} |
| 28 | set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) | 46 | # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified |
| 47 | ) | ||
| 48 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) | ||
| 49 | endif() | ||
| 50 | # Skip compiling to SPIR-V OpenGL exclusive files | ||
| 51 | if (NOT ${FILENAME} MATCHES "opengl.*") | ||
| 52 | string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) | ||
| 53 | set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) | ||
| 54 | add_custom_command( | ||
| 55 | OUTPUT | ||
| 56 | ${SPIRV_HEADER_FILE} | ||
| 57 | COMMAND | ||
| 58 | ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||
| 59 | MAIN_DEPENDENCY | ||
| 60 | ${SOURCE_FILE} | ||
| 61 | ) | ||
| 62 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) | ||
| 63 | endif() | ||
| 29 | endforeach() | 64 | endforeach() |
| 30 | 65 | ||
| 31 | add_custom_target(host_shaders | 66 | add_custom_target(host_shaders |
| 32 | DEPENDS | 67 | DEPENDS |
| 33 | ${SHADER_HEADERS} | 68 | ${SHADER_HEADERS} |
| 34 | SOURCES | 69 | SOURCES |
| 35 | ${SHADER_SOURCES} | 70 | ${SHADER_FILES} |
| 36 | ) | 71 | ) |
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 16 | #define BINDING_INPUT_BUFFER 1 | ||
| 17 | #define BINDING_OUTPUT_IMAGE 2 | ||
| 18 | |||
| 19 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 20 | |||
| 21 | #extension GL_NV_gpu_shader5 : enable | ||
| 22 | #ifdef GL_NV_gpu_shader5 | ||
| 23 | #define HAS_EXTENDED_TYPES 1 | ||
| 24 | #else | ||
| 25 | #define HAS_EXTENDED_TYPES 0 | ||
| 26 | #endif | ||
| 27 | #define BEGIN_PUSH_CONSTANTS | ||
| 28 | #define END_PUSH_CONSTANTS | ||
| 29 | #define UNIFORM(n) layout (location = n) uniform | ||
| 30 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 31 | #define BINDING_INPUT_BUFFER 1 | ||
| 32 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 33 | |||
| 34 | #endif | ||
| 35 | |||
| 36 | BEGIN_PUSH_CONSTANTS | ||
| 37 | UNIFORM(0) uvec3 origin; | ||
| 38 | UNIFORM(1) ivec3 destination; | ||
| 39 | UNIFORM(2) uint bytes_per_block_log2; | ||
| 40 | UNIFORM(3) uint layer_stride; | ||
| 41 | UNIFORM(4) uint block_size; | ||
| 42 | UNIFORM(5) uint x_shift; | ||
| 43 | UNIFORM(6) uint block_height; | ||
| 44 | UNIFORM(7) uint block_height_mask; | ||
| 45 | END_PUSH_CONSTANTS | ||
| 46 | |||
| 47 | layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | ||
| 48 | uint swizzle_table[]; | ||
| 49 | }; | ||
| 50 | |||
| 51 | #if HAS_EXTENDED_TYPES | ||
| 52 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 53 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 54 | #endif | ||
| 55 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; | ||
| 56 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 57 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 58 | |||
| 59 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; | ||
| 60 | |||
| 61 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | ||
| 62 | |||
| 63 | const uint GOB_SIZE_X = 64; | ||
| 64 | const uint GOB_SIZE_Y = 8; | ||
| 65 | const uint GOB_SIZE_Z = 1; | ||
| 66 | const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | ||
| 67 | |||
| 68 | const uint GOB_SIZE_X_SHIFT = 6; | ||
| 69 | const uint GOB_SIZE_Y_SHIFT = 3; | ||
| 70 | const uint GOB_SIZE_Z_SHIFT = 0; | ||
| 71 | const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 72 | |||
| 73 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | ||
| 74 | |||
| 75 | uint SwizzleOffset(uvec2 pos) { | ||
| 76 | pos = pos & SWIZZLE_MASK; | ||
| 77 | return swizzle_table[pos.y * 64 + pos.x]; | ||
| 78 | } | ||
| 79 | |||
| 80 | uvec4 ReadTexel(uint offset) { | ||
| 81 | switch (bytes_per_block_log2) { | ||
| 82 | #if HAS_EXTENDED_TYPES | ||
| 83 | case 0: | ||
| 84 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 85 | case 1: | ||
| 86 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 87 | #else | ||
| 88 | case 0: | ||
| 89 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 90 | case 1: | ||
| 91 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 92 | #endif | ||
| 93 | case 2: | ||
| 94 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 95 | case 3: | ||
| 96 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 97 | case 4: | ||
| 98 | return u128data[offset / 16]; | ||
| 99 | } | ||
| 100 | return uvec4(0); | ||
| 101 | } | ||
| 102 | |||
| 103 | void main() { | ||
| 104 | uvec3 pos = gl_GlobalInvocationID + origin; | ||
| 105 | pos.x <<= bytes_per_block_log2; | ||
| 106 | |||
| 107 | // Read as soon as possible due to its latency | ||
| 108 | const uint swizzle = SwizzleOffset(pos.xy); | ||
| 109 | |||
| 110 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||
| 111 | |||
| 112 | uint offset = 0; | ||
| 113 | offset += pos.z * layer_stride; | ||
| 114 | offset += (block_y >> block_height) * block_size; | ||
| 115 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||
| 116 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 117 | offset += swizzle; | ||
| 118 | |||
| 119 | const uvec4 texel = ReadTexel(offset); | ||
| 120 | const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; | ||
| 121 | imageStore(output_image, coord, texel); | ||
| 122 | } | ||
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 16 | #define BINDING_INPUT_BUFFER 1 | ||
| 17 | #define BINDING_OUTPUT_IMAGE 2 | ||
| 18 | |||
| 19 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 20 | |||
| 21 | #extension GL_NV_gpu_shader5 : enable | ||
| 22 | #ifdef GL_NV_gpu_shader5 | ||
| 23 | #define HAS_EXTENDED_TYPES 1 | ||
| 24 | #else | ||
| 25 | #define HAS_EXTENDED_TYPES 0 | ||
| 26 | #endif | ||
| 27 | #define BEGIN_PUSH_CONSTANTS | ||
| 28 | #define END_PUSH_CONSTANTS | ||
| 29 | #define UNIFORM(n) layout (location = n) uniform | ||
| 30 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 31 | #define BINDING_INPUT_BUFFER 1 | ||
| 32 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 33 | |||
| 34 | #endif | ||
| 35 | |||
| 36 | BEGIN_PUSH_CONSTANTS | ||
| 37 | UNIFORM(0) uvec3 origin; | ||
| 38 | UNIFORM(1) ivec3 destination; | ||
| 39 | UNIFORM(2) uint bytes_per_block_log2; | ||
| 40 | UNIFORM(3) uint slice_size; | ||
| 41 | UNIFORM(4) uint block_size; | ||
| 42 | UNIFORM(5) uint x_shift; | ||
| 43 | UNIFORM(6) uint block_height; | ||
| 44 | UNIFORM(7) uint block_height_mask; | ||
| 45 | UNIFORM(8) uint block_depth; | ||
| 46 | UNIFORM(9) uint block_depth_mask; | ||
| 47 | END_PUSH_CONSTANTS | ||
| 48 | |||
| 49 | layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | ||
| 50 | uint swizzle_table[]; | ||
| 51 | }; | ||
| 52 | |||
| 53 | #if HAS_EXTENDED_TYPES | ||
| 54 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 55 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 56 | #endif | ||
| 57 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; | ||
| 58 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 59 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 60 | |||
| 61 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; | ||
| 62 | |||
| 63 | layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; | ||
| 64 | |||
| 65 | const uint GOB_SIZE_X = 64; | ||
| 66 | const uint GOB_SIZE_Y = 8; | ||
| 67 | const uint GOB_SIZE_Z = 1; | ||
| 68 | const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | ||
| 69 | |||
| 70 | const uint GOB_SIZE_X_SHIFT = 6; | ||
| 71 | const uint GOB_SIZE_Y_SHIFT = 3; | ||
| 72 | const uint GOB_SIZE_Z_SHIFT = 0; | ||
| 73 | const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 74 | |||
| 75 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | ||
| 76 | |||
| 77 | uint SwizzleOffset(uvec2 pos) { | ||
| 78 | pos = pos & SWIZZLE_MASK; | ||
| 79 | return swizzle_table[pos.y * 64 + pos.x]; | ||
| 80 | } | ||
| 81 | |||
| 82 | uvec4 ReadTexel(uint offset) { | ||
| 83 | switch (bytes_per_block_log2) { | ||
| 84 | #if HAS_EXTENDED_TYPES | ||
| 85 | case 0: | ||
| 86 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 87 | case 1: | ||
| 88 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 89 | #else | ||
| 90 | case 0: | ||
| 91 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 92 | case 1: | ||
| 93 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 94 | #endif | ||
| 95 | case 2: | ||
| 96 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 97 | case 3: | ||
| 98 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 99 | case 4: | ||
| 100 | return u128data[offset / 16]; | ||
| 101 | } | ||
| 102 | return uvec4(0); | ||
| 103 | } | ||
| 104 | |||
| 105 | void main() { | ||
| 106 | uvec3 pos = gl_GlobalInvocationID + origin; | ||
| 107 | pos.x <<= bytes_per_block_log2; | ||
| 108 | |||
| 109 | // Read as soon as possible due to its latency | ||
| 110 | const uint swizzle = SwizzleOffset(pos.xy); | ||
| 111 | |||
| 112 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||
| 113 | |||
| 114 | uint offset = 0; | ||
| 115 | offset += (pos.z >> block_depth) * slice_size; | ||
| 116 | offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); | ||
| 117 | offset += (block_y >> block_height) * block_size; | ||
| 118 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||
| 119 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 120 | offset += swizzle; | ||
| 121 | |||
| 122 | const uvec4 texel = ReadTexel(offset); | ||
| 123 | const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; | ||
| 124 | imageStore(output_image, coord, texel); | ||
| 125 | } | ||
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D depth_texture; | ||
| 8 | layout(location = 0) out float output_color; | ||
| 9 | |||
| 10 | void main() { | ||
| 11 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 12 | output_color = texelFetch(depth_texture, coord, 0).r; | ||
| 13 | } | ||
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D color_texture; | ||
| 8 | |||
| 9 | void main() { | ||
| 10 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 11 | float color = texelFetch(color_texture, coord, 0).r; | ||
| 12 | gl_FragDepth = color; | ||
| 13 | } | ||
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 9 | #define END_PUSH_CONSTANTS }; | ||
| 10 | #define UNIFORM(n) | ||
| 11 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 12 | #define BEGIN_PUSH_CONSTANTS | ||
| 13 | #define END_PUSH_CONSTANTS | ||
| 14 | #define UNIFORM(n) layout (location = n) uniform | ||
| 15 | #endif | ||
| 16 | |||
| 17 | BEGIN_PUSH_CONSTANTS | ||
| 18 | UNIFORM(0) vec2 tex_scale; | ||
| 19 | UNIFORM(1) vec2 tex_offset; | ||
| 20 | END_PUSH_CONSTANTS | ||
| 21 | |||
| 22 | layout(location = 0) out vec2 texcoord; | ||
| 23 | |||
| 24 | void main() { | ||
| 25 | float x = float((gl_VertexIndex & 1) << 2); | ||
| 26 | float y = float((gl_VertexIndex & 2) << 1); | ||
| 27 | gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); | ||
| 28 | texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); | ||
| 29 | } | ||
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 core | ||
| 6 | #extension GL_ARB_gpu_shader_int64 : require | ||
| 7 | |||
| 8 | layout (local_size_x = 4, local_size_y = 4) in; | ||
| 9 | |||
| 10 | layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; | ||
| 11 | layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; | ||
| 12 | |||
| 13 | layout(location = 0) uniform uvec3 src_offset; | ||
| 14 | layout(location = 1) uniform uvec3 dst_offset; | ||
| 15 | |||
| 16 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 17 | uint DecompressBlock(uint64_t bits, uvec2 coord) { | ||
| 18 | const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); | ||
| 19 | const uint code = uint(bits >> code_offset) & 7; | ||
| 20 | const uint red0 = uint(bits >> 0) & 0xff; | ||
| 21 | const uint red1 = uint(bits >> 8) & 0xff; | ||
| 22 | if (red0 > red1) { | ||
| 23 | switch (code) { | ||
| 24 | case 0: | ||
| 25 | return red0; | ||
| 26 | case 1: | ||
| 27 | return red1; | ||
| 28 | case 2: | ||
| 29 | return (6 * red0 + 1 * red1) / 7; | ||
| 30 | case 3: | ||
| 31 | return (5 * red0 + 2 * red1) / 7; | ||
| 32 | case 4: | ||
| 33 | return (4 * red0 + 3 * red1) / 7; | ||
| 34 | case 5: | ||
| 35 | return (3 * red0 + 4 * red1) / 7; | ||
| 36 | case 6: | ||
| 37 | return (2 * red0 + 5 * red1) / 7; | ||
| 38 | case 7: | ||
| 39 | return (1 * red0 + 6 * red1) / 7; | ||
| 40 | } | ||
| 41 | } else { | ||
| 42 | switch (code) { | ||
| 43 | case 0: | ||
| 44 | return red0; | ||
| 45 | case 1: | ||
| 46 | return red1; | ||
| 47 | case 2: | ||
| 48 | return (4 * red0 + 1 * red1) / 5; | ||
| 49 | case 3: | ||
| 50 | return (3 * red0 + 2 * red1) / 5; | ||
| 51 | case 4: | ||
| 52 | return (2 * red0 + 3 * red1) / 5; | ||
| 53 | case 5: | ||
| 54 | return (1 * red0 + 4 * red1) / 5; | ||
| 55 | case 6: | ||
| 56 | return 0; | ||
| 57 | case 7: | ||
| 58 | return 0xff; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | void main() { | ||
| 65 | uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; | ||
| 66 | uint64_t bits = packUint2x32(packed_bits); | ||
| 67 | uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); | ||
| 68 | uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); | ||
| 69 | imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); | ||
| 70 | } | ||
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 1 | #version 430 core | 5 | #version 430 core |
| 2 | 6 | ||
| 3 | layout (location = 0) in vec2 frag_tex_coord; | 7 | layout (location = 0) in vec2 frag_tex_coord; |
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 1 | #version 430 core | 5 | #version 430 core |
| 2 | 6 | ||
| 3 | out gl_PerVertex { | 7 | out gl_PerVertex { |
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #extension GL_EXT_shader_16bit_storage : require | ||
| 10 | #extension GL_EXT_shader_8bit_storage : require | ||
| 11 | #define HAS_EXTENDED_TYPES 1 | ||
| 12 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 13 | #define END_PUSH_CONSTANTS }; | ||
| 14 | #define UNIFORM(n) | ||
| 15 | #define BINDING_INPUT_BUFFER 0 | ||
| 16 | #define BINDING_OUTPUT_IMAGE 1 | ||
| 17 | |||
| 18 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 19 | |||
| 20 | #extension GL_NV_gpu_shader5 : enable | ||
| 21 | #ifdef GL_NV_gpu_shader5 | ||
| 22 | #define HAS_EXTENDED_TYPES 1 | ||
| 23 | #else | ||
| 24 | #define HAS_EXTENDED_TYPES 0 | ||
| 25 | #endif | ||
| 26 | #define BEGIN_PUSH_CONSTANTS | ||
| 27 | #define END_PUSH_CONSTANTS | ||
| 28 | #define UNIFORM(n) layout (location = n) uniform | ||
| 29 | #define BINDING_INPUT_BUFFER 0 | ||
| 30 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 31 | |||
| 32 | #endif | ||
| 33 | |||
| 34 | BEGIN_PUSH_CONSTANTS | ||
| 35 | UNIFORM(0) uvec2 origin; | ||
| 36 | UNIFORM(1) ivec2 destination; | ||
| 37 | UNIFORM(2) uint bytes_per_block; | ||
| 38 | UNIFORM(3) uint pitch; | ||
| 39 | END_PUSH_CONSTANTS | ||
| 40 | |||
| 41 | #if HAS_EXTENDED_TYPES | ||
| 42 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; | ||
| 43 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; | ||
| 44 | #endif | ||
| 45 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; | ||
| 46 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; | ||
| 47 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; | ||
| 48 | |||
| 49 | layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; | ||
| 50 | |||
| 51 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | ||
| 52 | |||
| 53 | uvec4 ReadTexel(uint offset) { | ||
| 54 | switch (bytes_per_block) { | ||
| 55 | #if HAS_EXTENDED_TYPES | ||
| 56 | case 1: | ||
| 57 | return uvec4(u8data[offset], 0, 0, 0); | ||
| 58 | case 2: | ||
| 59 | return uvec4(u16data[offset / 2], 0, 0, 0); | ||
| 60 | #else | ||
| 61 | case 1: | ||
| 62 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); | ||
| 63 | case 2: | ||
| 64 | return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); | ||
| 65 | #endif | ||
| 66 | case 4: | ||
| 67 | return uvec4(u32data[offset / 4], 0, 0, 0); | ||
| 68 | case 8: | ||
| 69 | return uvec4(u64data[offset / 8], 0, 0); | ||
| 70 | case 16: | ||
| 71 | return u128data[offset / 16]; | ||
| 72 | } | ||
| 73 | return uvec4(0); | ||
| 74 | } | ||
| 75 | |||
| 76 | void main() { | ||
| 77 | uvec2 pos = gl_GlobalInvocationID.xy + origin; | ||
| 78 | |||
| 79 | uint offset = 0; | ||
| 80 | offset += pos.x * bytes_per_block; | ||
| 81 | offset += pos.y * pitch; | ||
| 82 | |||
| 83 | const uvec4 texel = ReadTexel(offset); | ||
| 84 | const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; | ||
| 85 | imageStore(output_image, coord, texel); | ||
| 86 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D tex; | ||
| 8 | |||
| 9 | layout(location = 0) in vec2 texcoord; | ||
| 10 | layout(location = 0) out vec4 color; | ||
| 11 | |||
| 12 | void main() { | ||
| 13 | color = textureLod(tex, texcoord, 0); | ||
| 14 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | #extension GL_ARB_shader_stencil_export : require | ||
| 7 | |||
| 8 | layout(binding = 0) uniform sampler2D depth_tex; | ||
| 9 | layout(binding = 1) uniform isampler2D stencil_tex; | ||
| 10 | |||
| 11 | layout(location = 0) in vec2 texcoord; | ||
| 12 | |||
| 13 | void main() { | ||
| 14 | gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; | ||
| 15 | gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; | ||
| 16 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag index a06ecd24a..0979ff3e6 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ b/src/video_core/host_shaders/vulkan_present.frag | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (location = 0) in vec2 frag_tex_coord; | 7 | layout (location = 0) in vec2 frag_tex_coord; |
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert index c64d9235a..00b868958 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ b/src/video_core/host_shaders/vulkan_present.vert | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (location = 0) in vec2 vert_position; | 7 | layout (location = 0) in vec2 vert_position; |
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp index 5a5703308..212f4e998 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ b/src/video_core/host_shaders/vulkan_quad_array.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (local_size_x = 1024) in; | 7 | layout (local_size_x = 1024) in; |
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index 5a472ba9b..8655591d0 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V quad_indexed.comp -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | 6 | ||
| 16 | layout (local_size_x = 1024) in; | 7 | layout (local_size_x = 1024) in; |
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index a320f3ae0..ad74d7af9 100644 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp | |||
| @@ -2,15 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | 5 | #version 460 core |
| 15 | #extension GL_EXT_shader_16bit_storage : require | 6 | #extension GL_EXT_shader_16bit_storage : require |
| 16 | #extension GL_EXT_shader_8bit_storage : require | 7 | #extension GL_EXT_shader_8bit_storage : require |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 59 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. |
| 60 | system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); | 60 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); |
| 61 | ASSERT(cpu_addr); | ||
| 62 | |||
| 63 | rasterizer->UnmapMemory(*cpu_addr, size); | ||
| 61 | 64 | ||
| 62 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); | 65 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); |
| 63 | } | 66 | } |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -1,250 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cstring> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/morton.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/textures/decoders.h" | ||
| 12 | |||
| 13 | namespace VideoCore { | ||
| 14 | |||
| 15 | using Surface::GetBytesPerPixel; | ||
| 16 | using Surface::PixelFormat; | ||
| 17 | |||
| 18 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); | ||
| 19 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | ||
| 20 | |||
| 21 | template <bool morton_to_linear, PixelFormat format> | ||
| 22 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | ||
| 23 | u32 tile_width_spacing, u8* buffer, u8* addr) { | ||
| 24 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 25 | |||
| 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 27 | // pixel values. | ||
| 28 | constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 29 | constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 30 | |||
| 31 | if constexpr (morton_to_linear) { | ||
| 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 33 | stride, height, depth, block_height, block_depth, | ||
| 34 | tile_width_spacing); | ||
| 35 | } else { | ||
| 36 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 37 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 38 | bytes_per_pixel, bytes_per_pixel, addr, buffer, false, | ||
| 39 | block_height, block_depth, tile_width_spacing); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static constexpr ConversionArray morton_to_linear_fns = { | ||
| 44 | MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, | ||
| 45 | MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, | ||
| 46 | MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, | ||
| 47 | MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, | ||
| 48 | MortonCopy<true, PixelFormat::R5G6B5_UNORM>, | ||
| 49 | MortonCopy<true, PixelFormat::B5G6R5_UNORM>, | ||
| 50 | MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, | ||
| 51 | MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, | ||
| 52 | MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, | ||
| 53 | MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, | ||
| 54 | MortonCopy<true, PixelFormat::R8_UNORM>, | ||
| 55 | MortonCopy<true, PixelFormat::R8_SNORM>, | ||
| 56 | MortonCopy<true, PixelFormat::R8_SINT>, | ||
| 57 | MortonCopy<true, PixelFormat::R8_UINT>, | ||
| 58 | MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, | ||
| 59 | MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, | ||
| 60 | MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, | ||
| 61 | MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, | ||
| 62 | MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, | ||
| 63 | MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, | ||
| 64 | MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, | ||
| 65 | MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, | ||
| 66 | MortonCopy<true, PixelFormat::BC2_UNORM>, | ||
| 67 | MortonCopy<true, PixelFormat::BC3_UNORM>, | ||
| 68 | MortonCopy<true, PixelFormat::BC4_UNORM>, | ||
| 69 | MortonCopy<true, PixelFormat::BC4_SNORM>, | ||
| 70 | MortonCopy<true, PixelFormat::BC5_UNORM>, | ||
| 71 | MortonCopy<true, PixelFormat::BC5_SNORM>, | ||
| 72 | MortonCopy<true, PixelFormat::BC7_UNORM>, | ||
| 73 | MortonCopy<true, PixelFormat::BC6H_UFLOAT>, | ||
| 74 | MortonCopy<true, PixelFormat::BC6H_SFLOAT>, | ||
| 75 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, | ||
| 76 | MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, | ||
| 77 | MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, | ||
| 78 | MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, | ||
| 79 | MortonCopy<true, PixelFormat::R32G32_FLOAT>, | ||
| 80 | MortonCopy<true, PixelFormat::R32G32_SINT>, | ||
| 81 | MortonCopy<true, PixelFormat::R32_FLOAT>, | ||
| 82 | MortonCopy<true, PixelFormat::R16_FLOAT>, | ||
| 83 | MortonCopy<true, PixelFormat::R16_UNORM>, | ||
| 84 | MortonCopy<true, PixelFormat::R16_SNORM>, | ||
| 85 | MortonCopy<true, PixelFormat::R16_UINT>, | ||
| 86 | MortonCopy<true, PixelFormat::R16_SINT>, | ||
| 87 | MortonCopy<true, PixelFormat::R16G16_UNORM>, | ||
| 88 | MortonCopy<true, PixelFormat::R16G16_FLOAT>, | ||
| 89 | MortonCopy<true, PixelFormat::R16G16_UINT>, | ||
| 90 | MortonCopy<true, PixelFormat::R16G16_SINT>, | ||
| 91 | MortonCopy<true, PixelFormat::R16G16_SNORM>, | ||
| 92 | MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, | ||
| 93 | MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, | ||
| 94 | MortonCopy<true, PixelFormat::R8G8_UNORM>, | ||
| 95 | MortonCopy<true, PixelFormat::R8G8_SNORM>, | ||
| 96 | MortonCopy<true, PixelFormat::R8G8_SINT>, | ||
| 97 | MortonCopy<true, PixelFormat::R8G8_UINT>, | ||
| 98 | MortonCopy<true, PixelFormat::R32G32_UINT>, | ||
| 99 | MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, | ||
| 100 | MortonCopy<true, PixelFormat::R32_UINT>, | ||
| 101 | MortonCopy<true, PixelFormat::R32_SINT>, | ||
| 102 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, | ||
| 103 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, | ||
| 104 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, | ||
| 105 | MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, | ||
| 106 | MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, | ||
| 107 | MortonCopy<true, PixelFormat::BC2_SRGB>, | ||
| 108 | MortonCopy<true, PixelFormat::BC3_SRGB>, | ||
| 109 | MortonCopy<true, PixelFormat::BC7_SRGB>, | ||
| 110 | MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, | ||
| 111 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 112 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 113 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 114 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 115 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, | ||
| 116 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 117 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, | ||
| 118 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 119 | MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, | ||
| 120 | MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, | ||
| 121 | MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, | ||
| 122 | MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, | ||
| 123 | MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, | ||
| 124 | MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, | ||
| 125 | MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, | ||
| 126 | MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, | ||
| 127 | MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, | ||
| 128 | MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, | ||
| 129 | MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, | ||
| 130 | MortonCopy<true, PixelFormat::D32_FLOAT>, | ||
| 131 | MortonCopy<true, PixelFormat::D16_UNORM>, | ||
| 132 | MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, | ||
| 133 | MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, | ||
| 134 | MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, | ||
| 135 | }; | ||
| 136 | |||
| 137 | static constexpr ConversionArray linear_to_morton_fns = { | ||
| 138 | MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, | ||
| 139 | MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, | ||
| 140 | MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, | ||
| 141 | MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, | ||
| 142 | MortonCopy<false, PixelFormat::R5G6B5_UNORM>, | ||
| 143 | MortonCopy<false, PixelFormat::B5G6R5_UNORM>, | ||
| 144 | MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, | ||
| 145 | MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, | ||
| 146 | MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, | ||
| 147 | MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, | ||
| 148 | MortonCopy<false, PixelFormat::R8_UNORM>, | ||
| 149 | MortonCopy<false, PixelFormat::R8_SNORM>, | ||
| 150 | MortonCopy<false, PixelFormat::R8_SINT>, | ||
| 151 | MortonCopy<false, PixelFormat::R8_UINT>, | ||
| 152 | MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, | ||
| 153 | MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, | ||
| 154 | MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, | ||
| 155 | MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, | ||
| 156 | MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, | ||
| 157 | MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, | ||
| 158 | MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, | ||
| 159 | MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, | ||
| 160 | MortonCopy<false, PixelFormat::BC2_UNORM>, | ||
| 161 | MortonCopy<false, PixelFormat::BC3_UNORM>, | ||
| 162 | MortonCopy<false, PixelFormat::BC4_UNORM>, | ||
| 163 | MortonCopy<false, PixelFormat::BC4_SNORM>, | ||
| 164 | MortonCopy<false, PixelFormat::BC5_UNORM>, | ||
| 165 | MortonCopy<false, PixelFormat::BC5_SNORM>, | ||
| 166 | MortonCopy<false, PixelFormat::BC7_UNORM>, | ||
| 167 | MortonCopy<false, PixelFormat::BC6H_UFLOAT>, | ||
| 168 | MortonCopy<false, PixelFormat::BC6H_SFLOAT>, | ||
| 169 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 170 | nullptr, | ||
| 171 | MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, | ||
| 172 | MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, | ||
| 173 | MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, | ||
| 174 | MortonCopy<false, PixelFormat::R32G32_FLOAT>, | ||
| 175 | MortonCopy<false, PixelFormat::R32G32_SINT>, | ||
| 176 | MortonCopy<false, PixelFormat::R32_FLOAT>, | ||
| 177 | MortonCopy<false, PixelFormat::R16_FLOAT>, | ||
| 178 | MortonCopy<false, PixelFormat::R16_UNORM>, | ||
| 179 | MortonCopy<false, PixelFormat::R16_SNORM>, | ||
| 180 | MortonCopy<false, PixelFormat::R16_UINT>, | ||
| 181 | MortonCopy<false, PixelFormat::R16_SINT>, | ||
| 182 | MortonCopy<false, PixelFormat::R16G16_UNORM>, | ||
| 183 | MortonCopy<false, PixelFormat::R16G16_FLOAT>, | ||
| 184 | MortonCopy<false, PixelFormat::R16G16_UINT>, | ||
| 185 | MortonCopy<false, PixelFormat::R16G16_SINT>, | ||
| 186 | MortonCopy<false, PixelFormat::R16G16_SNORM>, | ||
| 187 | MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, | ||
| 188 | MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, | ||
| 189 | MortonCopy<false, PixelFormat::R8G8_UNORM>, | ||
| 190 | MortonCopy<false, PixelFormat::R8G8_SNORM>, | ||
| 191 | MortonCopy<false, PixelFormat::R8G8_SINT>, | ||
| 192 | MortonCopy<false, PixelFormat::R8G8_UINT>, | ||
| 193 | MortonCopy<false, PixelFormat::R32G32_UINT>, | ||
| 194 | MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, | ||
| 195 | MortonCopy<false, PixelFormat::R32_UINT>, | ||
| 196 | MortonCopy<false, PixelFormat::R32_SINT>, | ||
| 197 | nullptr, | ||
| 198 | nullptr, | ||
| 199 | nullptr, | ||
| 200 | MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, | ||
| 201 | MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, | ||
| 202 | MortonCopy<false, PixelFormat::BC2_SRGB>, | ||
| 203 | MortonCopy<false, PixelFormat::BC3_SRGB>, | ||
| 204 | MortonCopy<false, PixelFormat::BC7_SRGB>, | ||
| 205 | MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, | ||
| 206 | nullptr, | ||
| 207 | nullptr, | ||
| 208 | nullptr, | ||
| 209 | nullptr, | ||
| 210 | nullptr, | ||
| 211 | nullptr, | ||
| 212 | nullptr, | ||
| 213 | nullptr, | ||
| 214 | nullptr, | ||
| 215 | nullptr, | ||
| 216 | nullptr, | ||
| 217 | nullptr, | ||
| 218 | nullptr, | ||
| 219 | nullptr, | ||
| 220 | nullptr, | ||
| 221 | nullptr, | ||
| 222 | nullptr, | ||
| 223 | nullptr, | ||
| 224 | MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, | ||
| 225 | MortonCopy<false, PixelFormat::D32_FLOAT>, | ||
| 226 | MortonCopy<false, PixelFormat::D16_UNORM>, | ||
| 227 | MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, | ||
| 228 | MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, | ||
| 229 | MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, | ||
| 230 | }; | ||
| 231 | |||
| 232 | static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { | ||
| 233 | switch (mode) { | ||
| 234 | case MortonSwizzleMode::MortonToLinear: | ||
| 235 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 236 | case MortonSwizzleMode::LinearToMorton: | ||
| 237 | return linear_to_morton_fns[static_cast<std::size_t>(format)]; | ||
| 238 | } | ||
| 239 | UNREACHABLE(); | ||
| 240 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 241 | } | ||
| 242 | |||
| 243 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | ||
| 244 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||
| 245 | u8* buffer, u8* addr) { | ||
| 246 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, | ||
| 247 | tile_width_spacing, buffer, addr); | ||
| 248 | } | ||
| 249 | |||
| 250 | } // namespace VideoCore | ||
diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h | |||
| @@ -1,18 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | |||
| 12 | enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | ||
| 13 | |||
| 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | ||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||
| 16 | u8* buffer, u8* addr); | ||
| 17 | |||
| 18 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -76,6 +76,9 @@ public: | |||
| 76 | /// Sync memory between guest and host. | 76 | /// Sync memory between guest and host. |
| 77 | virtual void SyncGuestHost() = 0; | 77 | virtual void SyncGuestHost() = 0; |
| 78 | 78 | ||
| 79 | /// Unmap memory range | ||
| 80 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||
| 81 | |||
| 79 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 82 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 80 | /// and invalidated | 83 | /// and invalidated |
| 81 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 84 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| @@ -83,6 +86,12 @@ public: | |||
| 83 | /// Notify the host renderer to wait for previous primitive and compute operations. | 86 | /// Notify the host renderer to wait for previous primitive and compute operations. |
| 84 | virtual void WaitForIdle() = 0; | 87 | virtual void WaitForIdle() = 0; |
| 85 | 88 | ||
| 89 | /// Notify the host renderer to wait for reads and writes to render targets and flush caches. | ||
| 90 | virtual void FragmentBarrier() = 0; | ||
| 91 | |||
| 92 | /// Notify the host renderer to make available previous render target writes. | ||
| 93 | virtual void TiledCacheBarrier() = 0; | ||
| 94 | |||
| 86 | /// Notify the rasterizer to send all written commands to the host GPU. | 95 | /// Notify the rasterizer to send all written commands to the host GPU. |
| 87 | virtual void FlushCommands() = 0; | 96 | virtual void FlushCommands() = 0; |
| 88 | 97 | ||
| @@ -91,8 +100,7 @@ public: | |||
| 91 | 100 | ||
| 92 | /// Attempt to use a faster method to perform a surface copy | 101 | /// Attempt to use a faster method to perform a surface copy |
| 93 | [[nodiscard]] virtual bool AccelerateSurfaceCopy( | 102 | [[nodiscard]] virtual bool AccelerateSurfaceCopy( |
| 94 | const Tegra::Engines::Fermi2D::Regs::Surface& src, | 103 | const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, |
| 95 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 96 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 104 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 97 | return false; | 105 | return false; |
| 98 | } | 106 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst | |||
| 61 | 61 | ||
| 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 64 | const Device& device_, std::size_t stream_size_) | 64 | const Device& device_, OGLStreamBuffer& stream_buffer_, |
| 65 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, | 65 | StateTracker& state_tracker) |
| 66 | std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)}, | 66 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { |
| 67 | device{device_} { | ||
| 68 | if (!device.HasFastBufferSubData()) { | 67 | if (!device.HasFastBufferSubData()) { |
| 69 | return; | 68 | return; |
| 70 | } | 69 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -22,6 +22,7 @@ namespace OpenGL { | |||
| 22 | class Device; | 22 | class Device; |
| 23 | class OGLStreamBuffer; | 23 | class OGLStreamBuffer; |
| 24 | class RasterizerOpenGL; | 24 | class RasterizerOpenGL; |
| 25 | class StateTracker; | ||
| 25 | 26 | ||
| 26 | class Buffer : public VideoCommon::BufferBlock { | 27 | class Buffer : public VideoCommon::BufferBlock { |
| 27 | public: | 28 | public: |
| @@ -52,9 +53,10 @@ private: | |||
| 52 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 53 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; |
| 53 | class OGLBufferCache final : public GenericBufferCache { | 54 | class OGLBufferCache final : public GenericBufferCache { |
| 54 | public: | 55 | public: |
| 55 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 56 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, |
| 56 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 57 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, |
| 57 | const Device& device_, std::size_t stream_size_); | 58 | const Device& device, OGLStreamBuffer& stream_buffer, |
| 59 | StateTracker& state_tracker); | ||
| 58 | ~OGLBufferCache(); | 60 | ~OGLBufferCache(); |
| 59 | 61 | ||
| 60 | BufferInfo GetEmptyBuffer(std::size_t) override; | 62 | BufferInfo GetEmptyBuffer(std::size_t) override; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..81b71edfb 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -5,9 +5,11 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstdlib> | ||
| 8 | #include <cstring> | 9 | #include <cstring> |
| 9 | #include <limits> | 10 | #include <limits> |
| 10 | #include <optional> | 11 | #include <optional> |
| 12 | #include <span> | ||
| 11 | #include <vector> | 13 | #include <vector> |
| 12 | 14 | ||
| 13 | #include <glad/glad.h> | 15 | #include <glad/glad.h> |
| @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; | |||
| 27 | 29 | ||
| 28 | constexpr u32 NumStages = 5; | 30 | constexpr u32 NumStages = 5; |
| 29 | 31 | ||
| 30 | constexpr std::array LimitUBOs = { | 32 | constexpr std::array LIMIT_UBOS = { |
| 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 33 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | 34 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; | 35 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, |
| 34 | 36 | }; | |
| 35 | constexpr std::array LimitSSBOs = { | 37 | constexpr std::array LIMIT_SSBOS = { |
| 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | 38 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, |
| 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | 39 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, |
| 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; | 40 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, |
| 39 | 41 | }; | |
| 40 | constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | 42 | constexpr std::array LIMIT_SAMPLERS = { |
| 41 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | 43 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, |
| 42 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | 44 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, |
| 43 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | 45 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, |
| 44 | GL_MAX_TEXTURE_IMAGE_UNITS, | 46 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, |
| 45 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; | 47 | GL_MAX_TEXTURE_IMAGE_UNITS, |
| 46 | 48 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, | |
| 47 | constexpr std::array LimitImages = { | 49 | }; |
| 50 | constexpr std::array LIMIT_IMAGES = { | ||
| 48 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | 51 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, |
| 49 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, | 52 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, |
| 50 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; | 53 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, |
| 54 | }; | ||
| 51 | 55 | ||
| 52 | template <typename T> | 56 | template <typename T> |
| 53 | T GetInteger(GLenum pname) { | 57 | T GetInteger(GLenum pname) { |
| @@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() { | |||
| 76 | return extensions; | 80 | return extensions; |
| 77 | } | 81 | } |
| 78 | 82 | ||
| 79 | bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { | 83 | bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) { |
| 80 | return std::find(images.begin(), images.end(), extension) != images.end(); | 84 | return std::ranges::find(extensions, extension) != extensions.end(); |
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | 87 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { |
| @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | |||
| 91 | 95 | ||
| 92 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | 96 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { |
| 93 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | 97 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; |
| 94 | std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), | 98 | std::ranges::transform(LIMIT_UBOS, max.begin(), |
| 95 | [](GLenum pname) { return GetInteger<u32>(pname); }); | 99 | [](GLenum pname) { return GetInteger<u32>(pname); }); |
| 96 | return max; | 100 | return max; |
| 97 | } | 101 | } |
| 98 | 102 | ||
| @@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 115 | for (std::size_t i = 0; i < NumStages; ++i) { | 119 | for (std::size_t i = 0; i < NumStages; ++i) { |
| 116 | const std::size_t stage = stage_swizzle[i]; | 120 | const std::size_t stage = stage_swizzle[i]; |
| 117 | bindings[stage] = { | 121 | bindings[stage] = { |
| 118 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), | 122 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), |
| 119 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), | 123 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), |
| 120 | Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; | 124 | Extract(base_samplers, num_samplers, total_samplers / NumStages, |
| 125 | LIMIT_SAMPLERS[stage])}; | ||
| 121 | } | 126 | } |
| 122 | 127 | ||
| 123 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | 128 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); |
| @@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 130 | 135 | ||
| 131 | // Reserve at least 4 image bindings on the fragment stage. | 136 | // Reserve at least 4 image bindings on the fragment stage. |
| 132 | bindings[4].image = | 137 | bindings[4].image = |
| 133 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); | 138 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); |
| 134 | 139 | ||
| 135 | // This is guaranteed to be at least 1. | 140 | // This is guaranteed to be at least 1. |
| 136 | const u32 total_extracted_images = num_images / (NumStages - 1); | 141 | const u32 total_extracted_images = num_images / (NumStages - 1); |
| @@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 142 | continue; | 147 | continue; |
| 143 | } | 148 | } |
| 144 | bindings[stage].image = | 149 | bindings[stage].image = |
| 145 | Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); | 150 | Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); |
| 146 | } | 151 | } |
| 147 | 152 | ||
| 148 | // Compute doesn't care about any of this. | 153 | // Compute doesn't care about any of this. |
| @@ -188,6 +193,11 @@ bool IsASTCSupported() { | |||
| 188 | return true; | 193 | return true; |
| 189 | } | 194 | } |
| 190 | 195 | ||
| 196 | [[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { | ||
| 197 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 198 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | ||
| 199 | } | ||
| 200 | |||
| 191 | } // Anonymous namespace | 201 | } // Anonymous namespace |
| 192 | 202 | ||
| 193 | Device::Device() | 203 | Device::Device() |
| @@ -198,6 +208,7 @@ Device::Device() | |||
| 198 | 208 | ||
| 199 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 209 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 200 | const bool is_amd = vendor == "ATI Technologies Inc."; | 210 | const bool is_amd = vendor == "ATI Technologies Inc."; |
| 211 | const bool is_intel = vendor == "Intel"; | ||
| 201 | 212 | ||
| 202 | bool disable_fast_buffer_sub_data = false; | 213 | bool disable_fast_buffer_sub_data = false; |
| 203 | if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { | 214 | if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { |
| @@ -206,9 +217,8 @@ Device::Device() | |||
| 206 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 217 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 207 | disable_fast_buffer_sub_data = true; | 218 | disable_fast_buffer_sub_data = true; |
| 208 | } | 219 | } |
| 209 | 220 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | |
| 210 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 221 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 211 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 212 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 222 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 213 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 223 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 214 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | 224 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); |
| @@ -222,8 +232,10 @@ Device::Device() | |||
| 222 | has_variable_aoffi = TestVariableAoffi(); | 232 | has_variable_aoffi = TestVariableAoffi(); |
| 223 | has_component_indexing_bug = is_amd; | 233 | has_component_indexing_bug = is_amd; |
| 224 | has_precise_bug = TestPreciseBug(); | 234 | has_precise_bug = TestPreciseBug(); |
| 235 | has_broken_texture_view_formats = is_amd || is_intel; | ||
| 225 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 236 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 226 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | 237 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; |
| 238 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | ||
| 227 | 239 | ||
| 228 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 240 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 229 | // uniform buffers as "push constants" | 241 | // uniform buffers as "push constants" |
| @@ -238,6 +250,8 @@ Device::Device() | |||
| 238 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 250 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 239 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 251 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
| 240 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 252 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
| 253 | LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", | ||
| 254 | has_broken_texture_view_formats); | ||
| 241 | 255 | ||
| 242 | if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { | 256 | if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { |
| 243 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | 257 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..3e79d1e37 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -36,11 +36,11 @@ public: | |||
| 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | std::size_t GetUniformBufferAlignment() const { | 39 | size_t GetUniformBufferAlignment() const { |
| 40 | return uniform_buffer_alignment; | 40 | return uniform_buffer_alignment; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | std::size_t GetShaderStorageBufferAlignment() const { | 43 | size_t GetShaderStorageBufferAlignment() const { |
| 44 | return shader_storage_alignment; | 44 | return shader_storage_alignment; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| @@ -96,6 +96,10 @@ public: | |||
| 96 | return has_precise_bug; | 96 | return has_precise_bug; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | bool HasBrokenTextureViewFormats() const { | ||
| 100 | return has_broken_texture_view_formats; | ||
| 101 | } | ||
| 102 | |||
| 99 | bool HasFastBufferSubData() const { | 103 | bool HasFastBufferSubData() const { |
| 100 | return has_fast_buffer_sub_data; | 104 | return has_fast_buffer_sub_data; |
| 101 | } | 105 | } |
| @@ -104,6 +108,10 @@ public: | |||
| 104 | return has_nv_viewport_array2; | 108 | return has_nv_viewport_array2; |
| 105 | } | 109 | } |
| 106 | 110 | ||
| 111 | bool HasDebuggingToolAttached() const { | ||
| 112 | return has_debugging_tool_attached; | ||
| 113 | } | ||
| 114 | |||
| 107 | bool UseAssemblyShaders() const { | 115 | bool UseAssemblyShaders() const { |
| 108 | return use_assembly_shaders; | 116 | return use_assembly_shaders; |
| 109 | } | 117 | } |
| @@ -118,8 +126,8 @@ private: | |||
| 118 | 126 | ||
| 119 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; | 127 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; |
| 120 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | 128 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; |
| 121 | std::size_t uniform_buffer_alignment{}; | 129 | size_t uniform_buffer_alignment{}; |
| 122 | std::size_t shader_storage_alignment{}; | 130 | size_t shader_storage_alignment{}; |
| 123 | u32 max_vertex_attributes{}; | 131 | u32 max_vertex_attributes{}; |
| 124 | u32 max_varyings{}; | 132 | u32 max_varyings{}; |
| 125 | u32 max_compute_shared_memory_size{}; | 133 | u32 max_compute_shared_memory_size{}; |
| @@ -133,8 +141,10 @@ private: | |||
| 133 | bool has_variable_aoffi{}; | 141 | bool has_variable_aoffi{}; |
| 134 | bool has_component_indexing_bug{}; | 142 | bool has_component_indexing_bug{}; |
| 135 | bool has_precise_bug{}; | 143 | bool has_precise_bug{}; |
| 144 | bool has_broken_texture_view_formats{}; | ||
| 136 | bool has_fast_buffer_sub_data{}; | 145 | bool has_fast_buffer_sub_data{}; |
| 137 | bool has_nv_viewport_array2{}; | 146 | bool has_nv_viewport_array2{}; |
| 147 | bool has_debugging_tool_attached{}; | ||
| 138 | bool use_assembly_shaders{}; | 148 | bool use_assembly_shaders{}; |
| 139 | bool use_asynchronous_shaders{}; | 149 | bool use_asynchronous_shaders{}; |
| 140 | }; | 150 | }; |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -46,7 +46,7 @@ void GLInnerFence::Wait() { | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, |
| 49 | Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, | 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, |
| 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) | 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} |
| 52 | 52 | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -33,12 +33,12 @@ private: | |||
| 33 | 33 | ||
| 34 | using Fence = std::shared_ptr<GLInnerFence>; | 34 | using Fence = std::shared_ptr<GLInnerFence>; |
| 35 | using GenericFenceManager = | 35 | using GenericFenceManager = |
| 36 | VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; | 36 | VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; |
| 37 | 37 | ||
| 38 | class FenceManagerOpenGL final : public GenericFenceManager { | 38 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 39 | public: | 39 | public: |
| 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 41 | TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, | 41 | TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, |
| 42 | QueryCache& query_cache_); | 42 | QueryCache& query_cache_); |
| 43 | 43 | ||
| 44 | protected: | 44 | protected: |
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null | |||
| @@ -1,85 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | #include <unordered_map> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 18 | using VideoCore::Surface::SurfaceType; | ||
| 19 | |||
| 20 | FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; | ||
| 21 | |||
| 22 | FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; | ||
| 23 | |||
| 24 | GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { | ||
| 25 | const auto [entry, is_cache_miss] = cache.try_emplace(key); | ||
| 26 | auto& framebuffer{entry->second}; | ||
| 27 | if (is_cache_miss) { | ||
| 28 | framebuffer = CreateFramebuffer(key); | ||
| 29 | } | ||
| 30 | return framebuffer.handle; | ||
| 31 | } | ||
| 32 | |||
| 33 | OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { | ||
| 34 | OGLFramebuffer framebuffer; | ||
| 35 | framebuffer.Create(); | ||
| 36 | |||
| 37 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. | ||
| 38 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); | ||
| 39 | |||
| 40 | if (key.zeta) { | ||
| 41 | const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; | ||
| 42 | const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; | ||
| 43 | key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t num_buffers = 0; | ||
| 47 | std::array<GLenum, Maxwell::NumRenderTargets> targets; | ||
| 48 | |||
| 49 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 50 | if (!key.colors[index]) { | ||
| 51 | targets[index] = GL_NONE; | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index); | ||
| 55 | key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); | ||
| 56 | |||
| 57 | const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; | ||
| 58 | targets[index] = GL_COLOR_ATTACHMENT0 + attachment; | ||
| 59 | num_buffers = index + 1; | ||
| 60 | } | ||
| 61 | |||
| 62 | if (num_buffers > 0) { | ||
| 63 | glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets)); | ||
| 64 | } else { | ||
| 65 | glDrawBuffer(GL_NONE); | ||
| 66 | } | ||
| 67 | |||
| 68 | return framebuffer; | ||
| 69 | } | ||
| 70 | |||
| 71 | std::size_t FramebufferCacheKey::Hash() const noexcept { | ||
| 72 | std::size_t hash = std::hash<View>{}(zeta); | ||
| 73 | for (const auto& color : colors) { | ||
| 74 | hash ^= std::hash<View>{}(color); | ||
| 75 | } | ||
| 76 | hash ^= static_cast<std::size_t>(color_attachments) << 16; | ||
| 77 | return hash; | ||
| 78 | } | ||
| 79 | |||
| 80 | bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 81 | return std::tie(colors, zeta, color_attachments) == | ||
| 82 | std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <glad/glad.h> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | constexpr std::size_t BitsPerAttachment = 4; | ||
| 21 | |||
| 22 | struct FramebufferCacheKey { | ||
| 23 | View zeta; | ||
| 24 | std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; | ||
| 25 | u32 color_attachments = 0; | ||
| 26 | |||
| 27 | std::size_t Hash() const noexcept; | ||
| 28 | |||
| 29 | bool operator==(const FramebufferCacheKey& rhs) const noexcept; | ||
| 30 | |||
| 31 | bool operator!=(const FramebufferCacheKey& rhs) const noexcept { | ||
| 32 | return !operator==(rhs); | ||
| 33 | } | ||
| 34 | |||
| 35 | void SetAttachment(std::size_t index, u32 attachment) { | ||
| 36 | color_attachments |= attachment << (BitsPerAttachment * index); | ||
| 37 | } | ||
| 38 | }; | ||
| 39 | |||
| 40 | } // namespace OpenGL | ||
| 41 | |||
| 42 | namespace std { | ||
| 43 | |||
| 44 | template <> | ||
| 45 | struct hash<OpenGL::FramebufferCacheKey> { | ||
| 46 | std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { | ||
| 47 | return k.Hash(); | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace std | ||
| 52 | |||
| 53 | namespace OpenGL { | ||
| 54 | |||
| 55 | class FramebufferCacheOpenGL { | ||
| 56 | public: | ||
| 57 | FramebufferCacheOpenGL(); | ||
| 58 | ~FramebufferCacheOpenGL(); | ||
| 59 | |||
| 60 | GLuint GetFramebuffer(const FramebufferCacheKey& key); | ||
| 61 | |||
| 62 | private: | ||
| 63 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); | ||
| 64 | |||
| 65 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -25,12 +25,15 @@ | |||
| 25 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 26 | #include "video_core/engines/shader_type.h" | 26 | #include "video_core/engines/shader_type.h" |
| 27 | #include "video_core/memory_manager.h" | 27 | #include "video_core/memory_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 29 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 30 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 31 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 32 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 33 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 32 | #include "video_core/renderer_opengl/renderer_opengl.h" | 34 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 33 | #include "video_core/shader_cache.h" | 35 | #include "video_core/shader_cache.h" |
| 36 | #include "video_core/texture_cache/texture_cache.h" | ||
| 34 | 37 | ||
| 35 | namespace OpenGL { | 38 | namespace OpenGL { |
| 36 | 39 | ||
| @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 | |||
| 55 | 58 | ||
| 56 | namespace { | 59 | namespace { |
| 57 | 60 | ||
| 58 | constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | 61 | constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; |
| 59 | constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | 62 | constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = |
| 60 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | 63 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; |
| 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | 64 | constexpr size_t TOTAL_CONST_BUFFER_BYTES = |
| 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | 65 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; |
| 63 | 66 | ||
| 64 | constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 67 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 65 | constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | 68 | constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; |
| 69 | |||
| 70 | constexpr size_t MAX_TEXTURES = 192; | ||
| 71 | constexpr size_t MAX_IMAGES = 48; | ||
| 72 | |||
| 73 | struct TextureHandle { | ||
| 74 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 75 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 76 | image = handle.tic_id; | ||
| 77 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 image; | ||
| 81 | u32 sampler; | ||
| 82 | }; | ||
| 66 | 83 | ||
| 67 | template <typename Engine, typename Entry> | 84 | template <typename Engine, typename Entry> |
| 68 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 85 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |
| 69 | ShaderType shader_type, std::size_t index = 0) { | 86 | ShaderType shader_type, size_t index = 0) { |
| 70 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | 87 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { |
| 71 | if (entry.is_separated) { | 88 | if (entry.is_separated) { |
| 72 | const u32 buffer_1 = entry.buffer; | 89 | const u32 buffer_1 = entry.buffer; |
| @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 75 | const u32 offset_2 = entry.secondary_offset; | 92 | const u32 offset_2 = entry.secondary_offset; |
| 76 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | 93 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |
| 77 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | 94 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |
| 78 | return engine.GetTextureInfo(handle_1 | handle_2); | 95 | return TextureHandle(handle_1 | handle_2, via_header_index); |
| 79 | } | 96 | } |
| 80 | } | 97 | } |
| 81 | if (entry.is_bindless) { | 98 | if (entry.is_bindless) { |
| 82 | const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | 99 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 83 | return engine.GetTextureInfo(handle); | 100 | return TextureHandle(raw, via_header_index); |
| 84 | } | ||
| 85 | |||
| 86 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 87 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 88 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 89 | return engine.GetStageTexture(shader_type, offset); | ||
| 90 | } else { | ||
| 91 | return engine.GetTexture(offset); | ||
| 92 | } | 101 | } |
| 102 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 103 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 104 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 93 | } | 105 | } |
| 94 | 106 | ||
| 95 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 107 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 97 | if (!entry.IsIndirect()) { | 109 | if (!entry.IsIndirect()) { |
| 98 | return entry.GetSize(); | 110 | return entry.GetSize(); |
| 99 | } | 111 | } |
| 100 | |||
| 101 | if (buffer.size > Maxwell::MaxConstBufferSize) { | 112 | if (buffer.size > Maxwell::MaxConstBufferSize) { |
| 102 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | 113 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, |
| 103 | Maxwell::MaxConstBufferSize); | 114 | Maxwell::MaxConstBufferSize); |
| @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss | |||
| 147 | reinterpret_cast<const GLuint*>(ssbos)); | 158 | reinterpret_cast<const GLuint*>(ssbos)); |
| 148 | } | 159 | } |
| 149 | 160 | ||
| 161 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 162 | if (entry.is_buffer) { | ||
| 163 | return ImageViewType::Buffer; | ||
| 164 | } | ||
| 165 | switch (entry.type) { | ||
| 166 | case Tegra::Shader::TextureType::Texture1D: | ||
| 167 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 168 | case Tegra::Shader::TextureType::Texture2D: | ||
| 169 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::TextureType::Texture3D: | ||
| 171 | return ImageViewType::e3D; | ||
| 172 | case Tegra::Shader::TextureType::TextureCube: | ||
| 173 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 174 | } | ||
| 175 | UNREACHABLE(); | ||
| 176 | return ImageViewType::e2D; | ||
| 177 | } | ||
| 178 | |||
| 179 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 180 | switch (entry.type) { | ||
| 181 | case Tegra::Shader::ImageType::Texture1D: | ||
| 182 | return ImageViewType::e1D; | ||
| 183 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 184 | return ImageViewType::e1DArray; | ||
| 185 | case Tegra::Shader::ImageType::Texture2D: | ||
| 186 | return ImageViewType::e2D; | ||
| 187 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 188 | return ImageViewType::e2DArray; | ||
| 189 | case Tegra::Shader::ImageType::Texture3D: | ||
| 190 | return ImageViewType::e3D; | ||
| 191 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 192 | return ImageViewType::Buffer; | ||
| 193 | } | ||
| 194 | UNREACHABLE(); | ||
| 195 | return ImageViewType::e2D; | ||
| 196 | } | ||
| 197 | |||
| 150 | } // Anonymous namespace | 198 | } // Anonymous namespace |
| 151 | 199 | ||
| 152 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 200 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 153 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 201 | Core::Memory::Memory& cpu_memory_, const Device& device_, |
| 154 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 202 | ScreenInfo& screen_info_, ProgramManager& program_manager_, |
| 155 | StateTracker& state_tracker_) | 203 | StateTracker& state_tracker_) |
| 156 | : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), | 204 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), |
| 157 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | 205 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), |
| 158 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | 206 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), |
| 159 | texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), | 207 | stream_buffer(device, state_tracker), |
| 208 | texture_cache_runtime(device, program_manager, state_tracker), | ||
| 209 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||
| 160 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 210 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 161 | query_cache(*this, maxwell3d, gpu_memory), | 211 | query_cache(*this, maxwell3d, gpu_memory), |
| 162 | buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), | 212 | buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), |
| 163 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 213 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 164 | async_shaders(emu_window_) { | 214 | async_shaders(emu_window_) { |
| 165 | CheckExtensions(); | ||
| 166 | |||
| 167 | unified_uniform_buffer.Create(); | 215 | unified_uniform_buffer.Create(); |
| 168 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | 216 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); |
| 169 | 217 | ||
| @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 174 | nullptr, 0); | 222 | nullptr, 0); |
| 175 | } | 223 | } |
| 176 | } | 224 | } |
| 177 | |||
| 178 | if (device.UseAsynchronousShaders()) { | 225 | if (device.UseAsynchronousShaders()) { |
| 179 | async_shaders.AllocateWorkers(); | 226 | async_shaders.AllocateWorkers(); |
| 180 | } | 227 | } |
| @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||
| 186 | } | 233 | } |
| 187 | } | 234 | } |
| 188 | 235 | ||
| 189 | void RasterizerOpenGL::CheckExtensions() { | ||
| 190 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 191 | LOG_WARNING( | ||
| 192 | Render_OpenGL, | ||
| 193 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void RasterizerOpenGL::SetupVertexFormat() { | 236 | void RasterizerOpenGL::SetupVertexFormat() { |
| 198 | auto& flags = maxwell3d.dirty.flags; | 237 | auto& flags = maxwell3d.dirty.flags; |
| 199 | if (!flags[Dirty::VertexFormats]) { | 238 | if (!flags[Dirty::VertexFormats]) { |
| @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 316 | return info.offset; | 355 | return info.offset; |
| 317 | } | 356 | } |
| 318 | 357 | ||
| 319 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 358 | void RasterizerOpenGL::SetupShaders() { |
| 320 | MICROPROFILE_SCOPE(OpenGL_Shader); | 359 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 321 | u32 clip_distances = 0; | 360 | u32 clip_distances = 0; |
| 322 | 361 | ||
| 362 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||
| 363 | image_view_indices.clear(); | ||
| 364 | sampler_handles.clear(); | ||
| 365 | |||
| 366 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 367 | |||
| 323 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 368 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 324 | const auto& shader_config = maxwell3d.regs.shader_config[index]; | 369 | const auto& shader_config = maxwell3d.regs.shader_config[index]; |
| 325 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 370 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 338 | } | 383 | } |
| 339 | continue; | 384 | continue; |
| 340 | } | 385 | } |
| 341 | |||
| 342 | // Currently this stages are not supported in the OpenGL backend. | 386 | // Currently this stages are not supported in the OpenGL backend. |
| 343 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | 387 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL |
| 344 | if (program == Maxwell::ShaderProgram::TesselationControl || | 388 | if (program == Maxwell::ShaderProgram::TesselationControl || |
| @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 347 | } | 391 | } |
| 348 | 392 | ||
| 349 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); | 393 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); |
| 350 | |||
| 351 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | 394 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; |
| 352 | switch (program) { | 395 | switch (program) { |
| 353 | case Maxwell::ShaderProgram::VertexA: | 396 | case Maxwell::ShaderProgram::VertexA: |
| @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 363 | default: | 406 | default: |
| 364 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 407 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 365 | shader_config.enable.Value(), shader_config.offset); | 408 | shader_config.enable.Value(), shader_config.offset); |
| 409 | break; | ||
| 366 | } | 410 | } |
| 367 | 411 | ||
| 368 | // Stage indices are 0 - 5 | 412 | // Stage indices are 0 - 5 |
| 369 | const std::size_t stage = index == 0 ? 0 : index - 1; | 413 | const size_t stage = index == 0 ? 0 : index - 1; |
| 414 | shaders[stage] = shader; | ||
| 415 | |||
| 370 | SetupDrawConstBuffers(stage, shader); | 416 | SetupDrawConstBuffers(stage, shader); |
| 371 | SetupDrawGlobalMemory(stage, shader); | 417 | SetupDrawGlobalMemory(stage, shader); |
| 372 | SetupDrawTextures(stage, shader); | 418 | SetupDrawTextures(shader, stage); |
| 373 | SetupDrawImages(stage, shader); | 419 | SetupDrawImages(shader, stage); |
| 374 | 420 | ||
| 375 | // Workaround for Intel drivers. | 421 | // Workaround for Intel drivers. |
| 376 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 422 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 384 | ++index; | 430 | ++index; |
| 385 | } | 431 | } |
| 386 | } | 432 | } |
| 387 | |||
| 388 | SyncClipEnabled(clip_distances); | 433 | SyncClipEnabled(clip_distances); |
| 389 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | 434 | maxwell3d.dirty.flags[Dirty::Shaders] = false; |
| 435 | |||
| 436 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 437 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 438 | |||
| 439 | size_t image_view_index = 0; | ||
| 440 | size_t texture_index = 0; | ||
| 441 | size_t image_index = 0; | ||
| 442 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 443 | const Shader* const shader = shaders[stage]; | ||
| 444 | if (shader) { | ||
| 445 | const auto base = device.GetBaseBindings(stage); | ||
| 446 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 447 | texture_index, image_index); | ||
| 448 | } | ||
| 449 | } | ||
| 390 | } | 450 | } |
| 391 | 451 | ||
| 392 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 452 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s | |||
| 417 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); | 477 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); |
| 418 | } | 478 | } |
| 419 | 479 | ||
| 420 | void RasterizerOpenGL::ConfigureFramebuffers() { | ||
| 421 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | ||
| 422 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; | ||
| 426 | |||
| 427 | texture_cache.GuardRenderTargets(true); | ||
| 428 | |||
| 429 | View depth_surface = texture_cache.GetDepthBufferSurface(true); | ||
| 430 | |||
| 431 | const auto& regs = maxwell3d.regs; | ||
| 432 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | ||
| 433 | |||
| 434 | // Bind the framebuffer surfaces | ||
| 435 | FramebufferCacheKey key; | ||
| 436 | const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); | ||
| 437 | for (std::size_t index = 0; index < colors_count; ++index) { | ||
| 438 | View color_surface{texture_cache.GetColorBufferSurface(index, true)}; | ||
| 439 | if (!color_surface) { | ||
| 440 | continue; | ||
| 441 | } | ||
| 442 | // Assume that a surface will be written to if it is used as a framebuffer, even | ||
| 443 | // if the shader doesn't actually write to it. | ||
| 444 | texture_cache.MarkColorBufferInUse(index); | ||
| 445 | |||
| 446 | key.SetAttachment(index, regs.rt_control.GetMap(index)); | ||
| 447 | key.colors[index] = std::move(color_surface); | ||
| 448 | } | ||
| 449 | |||
| 450 | if (depth_surface) { | ||
| 451 | // Assume that a surface will be written to if it is used as a framebuffer, even if | ||
| 452 | // the shader doesn't actually write to it. | ||
| 453 | texture_cache.MarkDepthBufferInUse(); | ||
| 454 | key.zeta = std::move(depth_surface); | ||
| 455 | } | ||
| 456 | |||
| 457 | texture_cache.GuardRenderTargets(false); | ||
| 458 | |||
| 459 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); | ||
| 460 | } | ||
| 461 | |||
| 462 | void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { | ||
| 463 | const auto& regs = maxwell3d.regs; | ||
| 464 | |||
| 465 | texture_cache.GuardRenderTargets(true); | ||
| 466 | View color_surface; | ||
| 467 | |||
| 468 | if (using_color) { | ||
| 469 | // Determine if we have to preserve the contents. | ||
| 470 | // First we have to make sure all clear masks are enabled. | ||
| 471 | bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || | ||
| 472 | !regs.clear_buffers.B || !regs.clear_buffers.A; | ||
| 473 | const std::size_t index = regs.clear_buffers.RT; | ||
| 474 | if (regs.clear_flags.scissor) { | ||
| 475 | // Then we have to confirm scissor testing clears the whole image. | ||
| 476 | const auto& scissor = regs.scissor_test[0]; | ||
| 477 | preserve_contents |= scissor.min_x > 0; | ||
| 478 | preserve_contents |= scissor.min_y > 0; | ||
| 479 | preserve_contents |= scissor.max_x < regs.rt[index].width; | ||
| 480 | preserve_contents |= scissor.max_y < regs.rt[index].height; | ||
| 481 | } | ||
| 482 | |||
| 483 | color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); | ||
| 484 | texture_cache.MarkColorBufferInUse(index); | ||
| 485 | } | ||
| 486 | |||
| 487 | View depth_surface; | ||
| 488 | if (using_depth_stencil) { | ||
| 489 | bool preserve_contents = false; | ||
| 490 | if (regs.clear_flags.scissor) { | ||
| 491 | // For depth stencil clears we only have to confirm scissor test covers the whole image. | ||
| 492 | const auto& scissor = regs.scissor_test[0]; | ||
| 493 | preserve_contents |= scissor.min_x > 0; | ||
| 494 | preserve_contents |= scissor.min_y > 0; | ||
| 495 | preserve_contents |= scissor.max_x < regs.zeta_width; | ||
| 496 | preserve_contents |= scissor.max_y < regs.zeta_height; | ||
| 497 | } | ||
| 498 | |||
| 499 | depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); | ||
| 500 | texture_cache.MarkDepthBufferInUse(); | ||
| 501 | } | ||
| 502 | texture_cache.GuardRenderTargets(false); | ||
| 503 | |||
| 504 | FramebufferCacheKey key; | ||
| 505 | key.colors[0] = std::move(color_surface); | ||
| 506 | key.zeta = std::move(depth_surface); | ||
| 507 | |||
| 508 | state_tracker.NotifyFramebuffer(); | ||
| 509 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); | ||
| 510 | } | ||
| 511 | |||
| 512 | void RasterizerOpenGL::Clear() { | 480 | void RasterizerOpenGL::Clear() { |
| 513 | if (!maxwell3d.ShouldExecute()) { | 481 | if (!maxwell3d.ShouldExecute()) { |
| 514 | return; | 482 | return; |
| @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { | |||
| 523 | regs.clear_buffers.A) { | 491 | regs.clear_buffers.A) { |
| 524 | use_color = true; | 492 | use_color = true; |
| 525 | 493 | ||
| 526 | state_tracker.NotifyColorMask0(); | 494 | const GLuint index = regs.clear_buffers.RT; |
| 527 | glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, | 495 | state_tracker.NotifyColorMask(index); |
| 496 | glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, | ||
| 528 | regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); | 497 | regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); |
| 529 | 498 | ||
| 530 | // TODO(Rodrigo): Determine if clamping is used on clears | 499 | // TODO(Rodrigo): Determine if clamping is used on clears |
| @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { | |||
| 557 | state_tracker.NotifyScissor0(); | 526 | state_tracker.NotifyScissor0(); |
| 558 | glDisablei(GL_SCISSOR_TEST, 0); | 527 | glDisablei(GL_SCISSOR_TEST, 0); |
| 559 | } | 528 | } |
| 560 | |||
| 561 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); | 529 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 562 | 530 | ||
| 563 | ConfigureClearFramebuffer(use_color, use_depth || use_stencil); | 531 | { |
| 532 | auto lock = texture_cache.AcquireLock(); | ||
| 533 | texture_cache.UpdateRenderTargets(true); | ||
| 534 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 535 | } | ||
| 564 | 536 | ||
| 565 | if (use_color) { | 537 | if (use_color) { |
| 566 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); | 538 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| 567 | } | 539 | } |
| 568 | |||
| 569 | if (use_depth && use_stencil) { | 540 | if (use_depth && use_stencil) { |
| 570 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 541 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 571 | } else if (use_depth) { | 542 | } else if (use_depth) { |
| @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 622 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 593 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 623 | 594 | ||
| 624 | // Prepare the vertex array. | 595 | // Prepare the vertex array. |
| 625 | const bool invalidated = buffer_cache.Map(buffer_size); | 596 | buffer_cache.Map(buffer_size); |
| 626 | |||
| 627 | if (invalidated) { | ||
| 628 | // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty | ||
| 629 | auto& dirty = maxwell3d.dirty.flags; | ||
| 630 | dirty[Dirty::VertexBuffers] = true; | ||
| 631 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 632 | dirty[index] = true; | ||
| 633 | } | ||
| 634 | } | ||
| 635 | 597 | ||
| 636 | // Prepare vertex array format. | 598 | // Prepare vertex array format. |
| 637 | SetupVertexFormat(); | 599 | SetupVertexFormat(); |
| @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 655 | } | 617 | } |
| 656 | 618 | ||
| 657 | // Setup shaders and their used resources. | 619 | // Setup shaders and their used resources. |
| 658 | texture_cache.GuardSamplers(true); | 620 | auto lock = texture_cache.AcquireLock(); |
| 659 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | 621 | SetupShaders(); |
| 660 | SetupShaders(primitive_mode); | ||
| 661 | texture_cache.GuardSamplers(false); | ||
| 662 | |||
| 663 | ConfigureFramebuffers(); | ||
| 664 | 622 | ||
| 665 | // Signal the buffer cache that we are not going to upload more things. | 623 | // Signal the buffer cache that we are not going to upload more things. |
| 666 | buffer_cache.Unmap(); | 624 | buffer_cache.Unmap(); |
| 667 | 625 | texture_cache.UpdateRenderTargets(false); | |
| 626 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 668 | program_manager.BindGraphicsPipeline(); | 627 | program_manager.BindGraphicsPipeline(); |
| 669 | 628 | ||
| 670 | if (texture_cache.TextureBarrier()) { | 629 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); |
| 671 | glTextureBarrier(); | ||
| 672 | } | ||
| 673 | |||
| 674 | BeginTransformFeedback(primitive_mode); | 630 | BeginTransformFeedback(primitive_mode); |
| 675 | 631 | ||
| 676 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); | 632 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); |
| @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 722 | buffer_cache.Acquire(); | 678 | buffer_cache.Acquire(); |
| 723 | current_cbuf = 0; | 679 | current_cbuf = 0; |
| 724 | 680 | ||
| 725 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 681 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); |
| 726 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 727 | 682 | ||
| 728 | SetupComputeTextures(kernel); | 683 | auto lock = texture_cache.AcquireLock(); |
| 729 | SetupComputeImages(kernel); | 684 | BindComputeTextures(kernel); |
| 730 | 685 | ||
| 731 | const std::size_t buffer_size = | 686 | const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * |
| 732 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 687 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 733 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 734 | buffer_cache.Map(buffer_size); | 688 | buffer_cache.Map(buffer_size); |
| 735 | 689 | ||
| 736 | SetupComputeConstBuffers(kernel); | 690 | SetupComputeConstBuffers(kernel); |
| @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 739 | buffer_cache.Unmap(); | 693 | buffer_cache.Unmap(); |
| 740 | 694 | ||
| 741 | const auto& launch_desc = kepler_compute.launch_description; | 695 | const auto& launch_desc = kepler_compute.launch_description; |
| 742 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 743 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 696 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 744 | ++num_queued_commands; | 697 | ++num_queued_commands; |
| 745 | } | 698 | } |
| @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 760 | if (addr == 0 || size == 0) { | 713 | if (addr == 0 || size == 0) { |
| 761 | return; | 714 | return; |
| 762 | } | 715 | } |
| 763 | texture_cache.FlushRegion(addr, size); | 716 | { |
| 717 | auto lock = texture_cache.AcquireLock(); | ||
| 718 | texture_cache.DownloadMemory(addr, size); | ||
| 719 | } | ||
| 764 | buffer_cache.FlushRegion(addr, size); | 720 | buffer_cache.FlushRegion(addr, size); |
| 765 | query_cache.FlushRegion(addr, size); | 721 | query_cache.FlushRegion(addr, size); |
| 766 | } | 722 | } |
| @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | |||
| 769 | if (!Settings::IsGPULevelHigh()) { | 725 | if (!Settings::IsGPULevelHigh()) { |
| 770 | return buffer_cache.MustFlushRegion(addr, size); | 726 | return buffer_cache.MustFlushRegion(addr, size); |
| 771 | } | 727 | } |
| 772 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | 728 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 729 | buffer_cache.MustFlushRegion(addr, size); | ||
| 773 | } | 730 | } |
| 774 | 731 | ||
| 775 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 732 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 777 | if (addr == 0 || size == 0) { | 734 | if (addr == 0 || size == 0) { |
| 778 | return; | 735 | return; |
| 779 | } | 736 | } |
| 780 | texture_cache.InvalidateRegion(addr, size); | 737 | { |
| 738 | auto lock = texture_cache.AcquireLock(); | ||
| 739 | texture_cache.WriteMemory(addr, size); | ||
| 740 | } | ||
| 781 | shader_cache.InvalidateRegion(addr, size); | 741 | shader_cache.InvalidateRegion(addr, size); |
| 782 | buffer_cache.InvalidateRegion(addr, size); | 742 | buffer_cache.InvalidateRegion(addr, size); |
| 783 | query_cache.InvalidateRegion(addr, size); | 743 | query_cache.InvalidateRegion(addr, size); |
| @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 788 | if (addr == 0 || size == 0) { | 748 | if (addr == 0 || size == 0) { |
| 789 | return; | 749 | return; |
| 790 | } | 750 | } |
| 791 | texture_cache.OnCPUWrite(addr, size); | 751 | { |
| 752 | auto lock = texture_cache.AcquireLock(); | ||
| 753 | texture_cache.WriteMemory(addr, size); | ||
| 754 | } | ||
| 792 | shader_cache.OnCPUWrite(addr, size); | 755 | shader_cache.OnCPUWrite(addr, size); |
| 793 | buffer_cache.OnCPUWrite(addr, size); | 756 | buffer_cache.OnCPUWrite(addr, size); |
| 794 | } | 757 | } |
| 795 | 758 | ||
| 796 | void RasterizerOpenGL::SyncGuestHost() { | 759 | void RasterizerOpenGL::SyncGuestHost() { |
| 797 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 760 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 798 | texture_cache.SyncGuestHost(); | ||
| 799 | buffer_cache.SyncGuestHost(); | 761 | buffer_cache.SyncGuestHost(); |
| 800 | shader_cache.SyncGuestHost(); | 762 | shader_cache.SyncGuestHost(); |
| 801 | } | 763 | } |
| 802 | 764 | ||
| 765 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||
| 766 | { | ||
| 767 | auto lock = texture_cache.AcquireLock(); | ||
| 768 | texture_cache.UnmapMemory(addr, size); | ||
| 769 | } | ||
| 770 | buffer_cache.OnCPUWrite(addr, size); | ||
| 771 | shader_cache.OnCPUWrite(addr, size); | ||
| 772 | } | ||
| 773 | |||
| 803 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 774 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 804 | if (!gpu.IsAsync()) { | 775 | if (!gpu.IsAsync()) { |
| 805 | gpu_memory.Write<u32>(addr, value); | 776 | gpu_memory.Write<u32>(addr, value); |
| @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { | |||
| 841 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); | 812 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); |
| 842 | } | 813 | } |
| 843 | 814 | ||
| 815 | void RasterizerOpenGL::FragmentBarrier() { | ||
| 816 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); | ||
| 817 | } | ||
| 818 | |||
| 819 | void RasterizerOpenGL::TiledCacheBarrier() { | ||
| 820 | glTextureBarrier(); | ||
| 821 | } | ||
| 822 | |||
| 844 | void RasterizerOpenGL::FlushCommands() { | 823 | void RasterizerOpenGL::FlushCommands() { |
| 845 | // Only flush when we have commands queued to OpenGL. | 824 | // Only flush when we have commands queued to OpenGL. |
| 846 | if (num_queued_commands == 0) { | 825 | if (num_queued_commands == 0) { |
| @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() { | |||
| 854 | // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. | 833 | // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. |
| 855 | num_queued_commands = 0; | 834 | num_queued_commands = 0; |
| 856 | 835 | ||
| 836 | fence_manager.TickFrame(); | ||
| 857 | buffer_cache.TickFrame(); | 837 | buffer_cache.TickFrame(); |
| 838 | { | ||
| 839 | auto lock = texture_cache.AcquireLock(); | ||
| 840 | texture_cache.TickFrame(); | ||
| 841 | } | ||
| 858 | } | 842 | } |
| 859 | 843 | ||
| 860 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 844 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 861 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 845 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 862 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 846 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 863 | MICROPROFILE_SCOPE(OpenGL_Blits); | 847 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 864 | texture_cache.DoFermiCopy(src, dst, copy_config); | 848 | auto lock = texture_cache.AcquireLock(); |
| 849 | texture_cache.BlitImage(dst, src, copy_config); | ||
| 865 | return true; | 850 | return true; |
| 866 | } | 851 | } |
| 867 | 852 | ||
| 868 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 853 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 869 | VAddr framebuffer_addr, u32 pixel_stride) { | 854 | VAddr framebuffer_addr, u32 pixel_stride) { |
| 870 | if (!framebuffer_addr) { | 855 | if (framebuffer_addr == 0) { |
| 871 | return {}; | 856 | return false; |
| 872 | } | 857 | } |
| 873 | |||
| 874 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 858 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 875 | 859 | ||
| 876 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | 860 | auto lock = texture_cache.AcquireLock(); |
| 877 | if (!surface) { | 861 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; |
| 878 | return {}; | 862 | if (!image_view) { |
| 863 | return false; | ||
| 879 | } | 864 | } |
| 880 | |||
| 881 | // Verify that the cached surface is the same size and format as the requested framebuffer | 865 | // Verify that the cached surface is the same size and format as the requested framebuffer |
| 882 | const auto& params{surface->GetSurfaceParams()}; | 866 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); |
| 883 | const auto& pixel_format{ | 867 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); |
| 884 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 885 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | ||
| 886 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | ||
| 887 | 868 | ||
| 888 | if (params.pixel_format != pixel_format) { | 869 | screen_info.display_texture = image_view->Handle(ImageViewType::e2D); |
| 889 | LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); | 870 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 890 | } | 871 | return true; |
| 872 | } | ||
| 891 | 873 | ||
| 892 | screen_info.display_texture = surface->GetTexture(); | 874 | void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { |
| 893 | screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; | 875 | image_view_indices.clear(); |
| 876 | sampler_handles.clear(); | ||
| 894 | 877 | ||
| 895 | return true; | 878 | texture_cache.SynchronizeComputeDescriptors(); |
| 879 | |||
| 880 | SetupComputeTextures(kernel); | ||
| 881 | SetupComputeImages(kernel); | ||
| 882 | |||
| 883 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 884 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 885 | |||
| 886 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 887 | size_t image_view_index = 0; | ||
| 888 | size_t texture_index = 0; | ||
| 889 | size_t image_index = 0; | ||
| 890 | BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); | ||
| 891 | } | ||
| 892 | |||
| 893 | void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, | ||
| 894 | GLuint base_image, size_t& image_view_index, | ||
| 895 | size_t& texture_index, size_t& image_index) { | ||
| 896 | const GLuint* const samplers = sampler_handles.data() + texture_index; | ||
| 897 | const GLuint* const textures = texture_handles.data() + texture_index; | ||
| 898 | const GLuint* const images = image_handles.data() + image_index; | ||
| 899 | |||
| 900 | const size_t num_samplers = entries.samplers.size(); | ||
| 901 | for (const auto& sampler : entries.samplers) { | ||
| 902 | for (size_t i = 0; i < sampler.size; ++i) { | ||
| 903 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 904 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 905 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); | ||
| 906 | texture_handles[texture_index++] = handle; | ||
| 907 | } | ||
| 908 | } | ||
| 909 | const size_t num_images = entries.images.size(); | ||
| 910 | for (size_t unit = 0; unit < num_images; ++unit) { | ||
| 911 | // TODO: Mark as modified | ||
| 912 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 913 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 914 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); | ||
| 915 | image_handles[image_index] = handle; | ||
| 916 | ++image_index; | ||
| 917 | } | ||
| 918 | if (num_samplers > 0) { | ||
| 919 | glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); | ||
| 920 | glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); | ||
| 921 | } | ||
| 922 | if (num_images > 0) { | ||
| 923 | glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); | ||
| 924 | } | ||
| 896 | } | 925 | } |
| 897 | 926 | ||
| 898 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | 927 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { |
| @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||
| 999 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | 1028 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, |
| 1000 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | 1029 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, |
| 1001 | }; | 1030 | }; |
| 1002 | |||
| 1003 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | 1031 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; |
| 1004 | const auto& entries{shader->GetEntries().global_memory_entries}; | 1032 | const auto& entries{shader->GetEntries().global_memory_entries}; |
| 1005 | 1033 | ||
| @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e | |||
| 1056 | } | 1084 | } |
| 1057 | } | 1085 | } |
| 1058 | 1086 | ||
| 1059 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { | 1087 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { |
| 1060 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1088 | const bool via_header_index = |
| 1061 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 1089 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1062 | for (const auto& entry : shader->GetEntries().samplers) { | 1090 | for (const auto& entry : shader->GetEntries().samplers) { |
| 1063 | const auto shader_type = static_cast<ShaderType>(stage_index); | 1091 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 1064 | for (std::size_t i = 0; i < entry.size; ++i) { | 1092 | for (size_t index = 0; index < entry.size; ++index) { |
| 1065 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | 1093 | const auto handle = |
| 1066 | SetupTexture(binding++, texture, entry); | 1094 | GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); |
| 1095 | const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 1096 | sampler_handles.push_back(sampler->Handle()); | ||
| 1097 | image_view_indices.push_back(handle.image); | ||
| 1067 | } | 1098 | } |
| 1068 | } | 1099 | } |
| 1069 | } | 1100 | } |
| 1070 | 1101 | ||
| 1071 | void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { | 1102 | void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { |
| 1072 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1103 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1073 | u32 binding = 0; | ||
| 1074 | for (const auto& entry : kernel->GetEntries().samplers) { | 1104 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 1075 | for (std::size_t i = 0; i < entry.size; ++i) { | 1105 | for (size_t i = 0; i < entry.size; ++i) { |
| 1076 | const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); | 1106 | const auto handle = |
| 1077 | SetupTexture(binding++, texture, entry); | 1107 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); |
| 1108 | const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 1109 | sampler_handles.push_back(sampler->Handle()); | ||
| 1110 | image_view_indices.push_back(handle.image); | ||
| 1078 | } | 1111 | } |
| 1079 | } | 1112 | } |
| 1080 | } | 1113 | } |
| 1081 | 1114 | ||
| 1082 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 1115 | void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { |
| 1083 | const SamplerEntry& entry) { | 1116 | const bool via_header_index = |
| 1084 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 1117 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1085 | if (!view) { | ||
| 1086 | // Can occur when texture addr is null or its memory is unmapped/invalid | ||
| 1087 | glBindSampler(binding, 0); | ||
| 1088 | glBindTextureUnit(binding, 0); | ||
| 1089 | return; | ||
| 1090 | } | ||
| 1091 | const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, | ||
| 1092 | texture.tic.z_source, texture.tic.w_source); | ||
| 1093 | glBindTextureUnit(binding, handle); | ||
| 1094 | if (!view->GetSurfaceParams().IsBuffer()) { | ||
| 1095 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 1096 | } | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { | ||
| 1100 | u32 binding = device.GetBaseBindings(stage_index).image; | ||
| 1101 | for (const auto& entry : shader->GetEntries().images) { | 1118 | for (const auto& entry : shader->GetEntries().images) { |
| 1102 | const auto shader_type = static_cast<ShaderType>(stage_index); | 1119 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 1103 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 1120 | const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); |
| 1104 | SetupImage(binding++, tic, entry); | 1121 | image_view_indices.push_back(handle.image); |
| 1105 | } | 1122 | } |
| 1106 | } | 1123 | } |
| 1107 | 1124 | ||
| 1108 | void RasterizerOpenGL::SetupComputeImages(Shader* shader) { | 1125 | void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { |
| 1109 | u32 binding = 0; | 1126 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1110 | for (const auto& entry : shader->GetEntries().images) { | 1127 | for (const auto& entry : shader->GetEntries().images) { |
| 1111 | const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; | 1128 | const auto handle = |
| 1112 | SetupImage(binding++, tic, entry); | 1129 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); |
| 1130 | image_view_indices.push_back(handle.image); | ||
| 1113 | } | 1131 | } |
| 1114 | } | 1132 | } |
| 1115 | 1133 | ||
| 1116 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | ||
| 1117 | const ImageEntry& entry) { | ||
| 1118 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1119 | if (!view) { | ||
| 1120 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | ||
| 1121 | return; | ||
| 1122 | } | ||
| 1123 | if (entry.is_written) { | ||
| 1124 | view->MarkAsModified(texture_cache.Tick()); | ||
| 1125 | } | ||
| 1126 | const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1127 | glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | void RasterizerOpenGL::SyncViewport() { | 1134 | void RasterizerOpenGL::SyncViewport() { |
| 1131 | auto& flags = maxwell3d.dirty.flags; | 1135 | auto& flags = maxwell3d.dirty.flags; |
| 1132 | const auto& regs = maxwell3d.regs; | 1136 | const auto& regs = maxwell3d.regs; |
| @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1526 | flags[Dirty::PointSize] = false; | 1530 | flags[Dirty::PointSize] = false; |
| 1527 | 1531 | ||
| 1528 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); | 1532 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); |
| 1533 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); | ||
| 1529 | 1534 | ||
| 1530 | if (maxwell3d.regs.vp_point_size.enable) { | ||
| 1531 | // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. | ||
| 1532 | glEnable(GL_PROGRAM_POINT_SIZE); | ||
| 1533 | return; | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid | ||
| 1537 | // in OpenGL). | ||
| 1538 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); | 1535 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); |
| 1539 | glDisable(GL_PROGRAM_POINT_SIZE); | ||
| 1540 | } | 1536 | } |
| 1541 | 1537 | ||
| 1542 | void RasterizerOpenGL::SyncLineState() { | 1538 | void RasterizerOpenGL::SyncLineState() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -7,12 +7,13 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <map> | ||
| 11 | #include <memory> | 10 | #include <memory> |
| 12 | #include <optional> | 11 | #include <optional> |
| 13 | #include <tuple> | 12 | #include <tuple> |
| 14 | #include <utility> | 13 | #include <utility> |
| 15 | 14 | ||
| 15 | #include <boost/container/static_vector.hpp> | ||
| 16 | |||
| 16 | #include <glad/glad.h> | 17 | #include <glad/glad.h> |
| 17 | 18 | ||
| 18 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| @@ -23,16 +24,14 @@ | |||
| 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 24 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 25 | #include "video_core/renderer_opengl/gl_fence_manager.h" | 26 | #include "video_core/renderer_opengl/gl_fence_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_query_cache.h" | 27 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | ||
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 33 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/renderer_opengl/utils.h" | ||
| 36 | #include "video_core/shader/async_shaders.h" | 35 | #include "video_core/shader/async_shaders.h" |
| 37 | #include "video_core/textures/texture.h" | 36 | #include "video_core/textures/texture.h" |
| 38 | 37 | ||
| @@ -51,7 +50,7 @@ class MemoryManager; | |||
| 51 | namespace OpenGL { | 50 | namespace OpenGL { |
| 52 | 51 | ||
| 53 | struct ScreenInfo; | 52 | struct ScreenInfo; |
| 54 | struct DrawParameters; | 53 | struct ShaderEntries; |
| 55 | 54 | ||
| 56 | struct BindlessSSBO { | 55 | struct BindlessSSBO { |
| 57 | GLuint64EXT address; | 56 | GLuint64EXT address; |
| @@ -79,15 +78,18 @@ public: | |||
| 79 | void InvalidateRegion(VAddr addr, u64 size) override; | 78 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 80 | void OnCPUWrite(VAddr addr, u64 size) override; | 79 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 81 | void SyncGuestHost() override; | 80 | void SyncGuestHost() override; |
| 81 | void UnmapMemory(VAddr addr, u64 size) override; | ||
| 82 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 82 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 83 | void SignalSyncPoint(u32 value) override; | 83 | void SignalSyncPoint(u32 value) override; |
| 84 | void ReleaseFences() override; | 84 | void ReleaseFences() override; |
| 85 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 85 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 86 | void WaitForIdle() override; | 86 | void WaitForIdle() override; |
| 87 | void FragmentBarrier() override; | ||
| 88 | void TiledCacheBarrier() override; | ||
| 87 | void FlushCommands() override; | 89 | void FlushCommands() override; |
| 88 | void TickFrame() override; | 90 | void TickFrame() override; |
| 89 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 91 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 90 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 92 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 91 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 93 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 92 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 94 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 93 | u32 pixel_stride) override; | 95 | u32 pixel_stride) override; |
| @@ -108,11 +110,14 @@ public: | |||
| 108 | } | 110 | } |
| 109 | 111 | ||
| 110 | private: | 112 | private: |
| 111 | /// Configures the color and depth framebuffer states. | 113 | static constexpr size_t MAX_TEXTURES = 192; |
| 112 | void ConfigureFramebuffers(); | 114 | static constexpr size_t MAX_IMAGES = 48; |
| 115 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | ||
| 116 | |||
| 117 | void BindComputeTextures(Shader* kernel); | ||
| 113 | 118 | ||
| 114 | /// Configures the color and depth framebuffer for clearing. | 119 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, |
| 115 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); | 120 | size_t& image_view_index, size_t& texture_index, size_t& image_index); |
| 116 | 121 | ||
| 117 | /// Configures the current constbuffers to use for the draw command. | 122 | /// Configures the current constbuffers to use for the draw command. |
| 118 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); | 123 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); |
| @@ -136,23 +141,16 @@ private: | |||
| 136 | size_t size, BindlessSSBO* ssbo); | 141 | size_t size, BindlessSSBO* ssbo); |
| 137 | 142 | ||
| 138 | /// Configures the current textures to use for the draw command. | 143 | /// Configures the current textures to use for the draw command. |
| 139 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); | 144 | void SetupDrawTextures(const Shader* shader, size_t stage_index); |
| 140 | 145 | ||
| 141 | /// Configures the textures used in a compute shader. | 146 | /// Configures the textures used in a compute shader. |
| 142 | void SetupComputeTextures(Shader* kernel); | 147 | void SetupComputeTextures(const Shader* kernel); |
| 143 | |||
| 144 | /// Configures a texture. | ||
| 145 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | ||
| 146 | const SamplerEntry& entry); | ||
| 147 | 148 | ||
| 148 | /// Configures images in a graphics shader. | 149 | /// Configures images in a graphics shader. |
| 149 | void SetupDrawImages(std::size_t stage_index, Shader* shader); | 150 | void SetupDrawImages(const Shader* shader, size_t stage_index); |
| 150 | 151 | ||
| 151 | /// Configures images in a compute shader. | 152 | /// Configures images in a compute shader. |
| 152 | void SetupComputeImages(Shader* shader); | 153 | void SetupComputeImages(const Shader* shader); |
| 153 | |||
| 154 | /// Configures an image. | ||
| 155 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 156 | 154 | ||
| 157 | /// Syncs the viewport and depth range to match the guest state | 155 | /// Syncs the viewport and depth range to match the guest state |
| 158 | void SyncViewport(); | 156 | void SyncViewport(); |
| @@ -227,9 +225,6 @@ private: | |||
| 227 | /// End a transform feedback | 225 | /// End a transform feedback |
| 228 | void EndTransformFeedback(); | 226 | void EndTransformFeedback(); |
| 229 | 227 | ||
| 230 | /// Check for extension that are not strictly required but are needed for correct emulation | ||
| 231 | void CheckExtensions(); | ||
| 232 | |||
| 233 | std::size_t CalculateVertexArraysSize() const; | 228 | std::size_t CalculateVertexArraysSize() const; |
| 234 | 229 | ||
| 235 | std::size_t CalculateIndexBufferSize() const; | 230 | std::size_t CalculateIndexBufferSize() const; |
| @@ -242,7 +237,7 @@ private: | |||
| 242 | 237 | ||
| 243 | GLintptr SetupIndexBuffer(); | 238 | GLintptr SetupIndexBuffer(); |
| 244 | 239 | ||
| 245 | void SetupShaders(GLenum primitive_mode); | 240 | void SetupShaders(); |
| 246 | 241 | ||
| 247 | Tegra::GPU& gpu; | 242 | Tegra::GPU& gpu; |
| 248 | Tegra::Engines::Maxwell3D& maxwell3d; | 243 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -254,19 +249,21 @@ private: | |||
| 254 | ProgramManager& program_manager; | 249 | ProgramManager& program_manager; |
| 255 | StateTracker& state_tracker; | 250 | StateTracker& state_tracker; |
| 256 | 251 | ||
| 257 | TextureCacheOpenGL texture_cache; | 252 | OGLStreamBuffer stream_buffer; |
| 253 | TextureCacheRuntime texture_cache_runtime; | ||
| 254 | TextureCache texture_cache; | ||
| 258 | ShaderCacheOpenGL shader_cache; | 255 | ShaderCacheOpenGL shader_cache; |
| 259 | SamplerCacheOpenGL sampler_cache; | ||
| 260 | FramebufferCacheOpenGL framebuffer_cache; | ||
| 261 | QueryCache query_cache; | 256 | QueryCache query_cache; |
| 262 | OGLBufferCache buffer_cache; | 257 | OGLBufferCache buffer_cache; |
| 263 | FenceManagerOpenGL fence_manager; | 258 | FenceManagerOpenGL fence_manager; |
| 264 | 259 | ||
| 265 | VideoCommon::Shader::AsyncShaders async_shaders; | 260 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 266 | 261 | ||
| 267 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 262 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 268 | 263 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | |
| 269 | GLint vertex_binding = 0; | 264 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
| 265 | std::array<GLuint, MAX_TEXTURES> texture_handles; | ||
| 266 | std::array<GLuint, MAX_IMAGES> image_handles; | ||
| 270 | 267 | ||
| 271 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | 268 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> |
| 272 | transform_feedback_buffers; | 269 | transform_feedback_buffers; |
| @@ -280,7 +277,7 @@ private: | |||
| 280 | std::size_t current_cbuf = 0; | 277 | std::size_t current_cbuf = 0; |
| 281 | OGLBuffer unified_uniform_buffer; | 278 | OGLBuffer unified_uniform_buffer; |
| 282 | 279 | ||
| 283 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 280 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 284 | std::size_t num_queued_commands = 0; | 281 | std::size_t num_queued_commands = 0; |
| 285 | 282 | ||
| 286 | u32 last_clip_distance_mask = 0; | 283 | u32 last_clip_distance_mask = 0; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -71,7 +71,7 @@ void OGLSampler::Create() { | |||
| 71 | return; | 71 | return; |
| 72 | 72 | ||
| 73 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | 73 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); |
| 74 | glGenSamplers(1, &handle); | 74 | glCreateSamplers(1, &handle); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | void OGLSampler::Release() { | 77 | void OGLSampler::Release() { |
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null | |||
| @@ -1,52 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | ||
| 8 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 9 | |||
| 10 | namespace OpenGL { | ||
| 11 | |||
| 12 | SamplerCacheOpenGL::SamplerCacheOpenGL() = default; | ||
| 13 | |||
| 14 | SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; | ||
| 15 | |||
| 16 | OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { | ||
| 17 | OGLSampler sampler; | ||
| 18 | sampler.Create(); | ||
| 19 | |||
| 20 | const GLuint sampler_id{sampler.handle}; | ||
| 21 | glSamplerParameteri( | ||
| 22 | sampler_id, GL_TEXTURE_MAG_FILTER, | ||
| 23 | MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); | ||
| 24 | glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, | ||
| 25 | MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); | ||
| 26 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); | ||
| 27 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); | ||
| 28 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); | ||
| 29 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, | ||
| 30 | tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); | ||
| 31 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, | ||
| 32 | MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); | ||
| 33 | glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); | ||
| 34 | glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); | ||
| 35 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); | ||
| 36 | glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); | ||
| 37 | if (GLAD_GL_ARB_texture_filter_anisotropic) { | ||
| 38 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); | ||
| 39 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 40 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); | ||
| 41 | } else { | ||
| 42 | LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); | ||
| 43 | } | ||
| 44 | |||
| 45 | return sampler; | ||
| 46 | } | ||
| 47 | |||
| 48 | GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { | ||
| 49 | return sampler.handle; | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null | |||
| @@ -1,25 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 10 | #include "video_core/sampler_cache.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { | ||
| 15 | public: | ||
| 16 | explicit SamplerCacheOpenGL(); | ||
| 17 | ~SamplerCacheOpenGL(); | ||
| 18 | |||
| 19 | protected: | ||
| 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; | ||
| 21 | |||
| 22 | GLuint ToSamplerType(const OGLSampler& sampler) const override; | ||
| 23 | }; | ||
| 24 | |||
| 25 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 30 | #include "video_core/renderer_opengl/utils.h" | ||
| 31 | #include "video_core/shader/memory_util.h" | 30 | #include "video_core/shader/memory_util.h" |
| 32 | #include "video_core/shader/registry.h" | 31 | #include "video_core/shader/registry.h" |
| 33 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; | |||
| 38 | using Tegra::Shader::PixelImap; | 38 | using Tegra::Shader::PixelImap; |
| 39 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using Tegra::Shader::TextureType; | 40 | using Tegra::Shader::TextureType; |
| 41 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 42 | using VideoCommon::Shader::Registry; | ||
| 43 | 41 | ||
| 44 | using namespace std::string_literals; | ||
| 45 | using namespace VideoCommon::Shader; | 42 | using namespace VideoCommon::Shader; |
| 43 | using namespace std::string_literals; | ||
| 46 | 44 | ||
| 47 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 48 | using Operation = const OperationNode&; | 46 | using Operation = const OperationNode&; |
| @@ -2753,11 +2751,11 @@ private: | |||
| 2753 | } | 2751 | } |
| 2754 | } | 2752 | } |
| 2755 | 2753 | ||
| 2756 | std::string GetSampler(const Sampler& sampler) const { | 2754 | std::string GetSampler(const SamplerEntry& sampler) const { |
| 2757 | return AppendSuffix(sampler.index, "sampler"); | 2755 | return AppendSuffix(sampler.index, "sampler"); |
| 2758 | } | 2756 | } |
| 2759 | 2757 | ||
| 2760 | std::string GetImage(const Image& image) const { | 2758 | std::string GetImage(const ImageEntry& image) const { |
| 2761 | return AppendSuffix(image.index, "image"); | 2759 | return AppendSuffix(image.index, "image"); |
| 2762 | } | 2760 | } |
| 2763 | 2761 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -20,8 +20,8 @@ namespace OpenGL { | |||
| 20 | class Device; | 20 | class Device; |
| 21 | 21 | ||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 23 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; |
| 24 | using ImageEntry = VideoCommon::Shader::Image; | 24 | using ImageEntry = VideoCommon::Shader::ImageEntry; |
| 25 | 25 | ||
| 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { |
| 27 | public: | 27 | public: |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { | |||
| 83 | } | 83 | } |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | void ProgramManager::BindHostCompute(GLuint program) { | ||
| 87 | if (use_assembly_programs) { | ||
| 88 | glDisable(GL_COMPUTE_PROGRAM_NV); | ||
| 89 | } | ||
| 90 | glUseProgram(program); | ||
| 91 | is_graphics_bound = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | void ProgramManager::RestoreGuestCompute() { | ||
| 95 | if (use_assembly_programs) { | ||
| 96 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 97 | glUseProgram(0); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 86 | void ProgramManager::UseVertexShader(GLuint program) { | 101 | void ProgramManager::UseVertexShader(GLuint program) { |
| 87 | if (use_assembly_programs) { | 102 | if (use_assembly_programs) { |
| 88 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); | 103 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -45,6 +45,12 @@ public: | |||
| 45 | /// Rewinds BindHostPipeline state changes. | 45 | /// Rewinds BindHostPipeline state changes. |
| 46 | void RestoreGuestPipeline(); | 46 | void RestoreGuestPipeline(); |
| 47 | 47 | ||
| 48 | /// Binds an OpenGL GLSL program object unsynchronized with the guest state. | ||
| 49 | void BindHostCompute(GLuint program); | ||
| 50 | |||
| 51 | /// Rewinds BindHostCompute state changes. | ||
| 52 | void RestoreGuestCompute(); | ||
| 53 | |||
| 48 | void UseVertexShader(GLuint program); | 54 | void UseVertexShader(GLuint program); |
| 49 | void UseGeometryShader(GLuint program); | 55 | void UseGeometryShader(GLuint program); |
| 50 | void UseFragmentShader(GLuint program); | 56 | void UseFragmentShader(GLuint program); |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
| 249 | } | 249 | } |
| 250 | } | 250 | } |
| 251 | 251 | ||
| 252 | void StateTracker::InvalidateStreamBuffer() { | ||
| 253 | flags[Dirty::VertexBuffers] = true; | ||
| 254 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 255 | flags[index] = true; | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 252 | } // namespace OpenGL | 259 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -92,6 +92,8 @@ class StateTracker { | |||
| 92 | public: | 92 | public: |
| 93 | explicit StateTracker(Tegra::GPU& gpu); | 93 | explicit StateTracker(Tegra::GPU& gpu); |
| 94 | 94 | ||
| 95 | void InvalidateStreamBuffer(); | ||
| 96 | |||
| 95 | void BindIndexBuffer(GLuint new_index_buffer) { | 97 | void BindIndexBuffer(GLuint new_index_buffer) { |
| 96 | if (index_buffer == new_index_buffer) { | 98 | if (index_buffer == new_index_buffer) { |
| 97 | return; | 99 | return; |
| @@ -100,6 +102,14 @@ public: | |||
| 100 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); | 102 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | void BindFramebuffer(GLuint new_framebuffer) { | ||
| 106 | if (framebuffer == new_framebuffer) { | ||
| 107 | return; | ||
| 108 | } | ||
| 109 | framebuffer = new_framebuffer; | ||
| 110 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); | ||
| 111 | } | ||
| 112 | |||
| 103 | void NotifyScreenDrawVertexArray() { | 113 | void NotifyScreenDrawVertexArray() { |
| 104 | flags[OpenGL::Dirty::VertexFormats] = true; | 114 | flags[OpenGL::Dirty::VertexFormats] = true; |
| 105 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | 115 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; |
| @@ -129,9 +139,9 @@ public: | |||
| 129 | flags[OpenGL::Dirty::Scissor0] = true; | 139 | flags[OpenGL::Dirty::Scissor0] = true; |
| 130 | } | 140 | } |
| 131 | 141 | ||
| 132 | void NotifyColorMask0() { | 142 | void NotifyColorMask(size_t index) { |
| 133 | flags[OpenGL::Dirty::ColorMasks] = true; | 143 | flags[OpenGL::Dirty::ColorMasks] = true; |
| 134 | flags[OpenGL::Dirty::ColorMask0] = true; | 144 | flags[OpenGL::Dirty::ColorMask0 + index] = true; |
| 135 | } | 145 | } |
| 136 | 146 | ||
| 137 | void NotifyBlend0() { | 147 | void NotifyBlend0() { |
| @@ -190,6 +200,7 @@ public: | |||
| 190 | private: | 200 | private: |
| 191 | Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; | 201 | Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; |
| 192 | 202 | ||
| 203 | GLuint framebuffer = 0; | ||
| 193 | GLuint index_buffer = 0; | 204 | GLuint index_buffer = 0; |
| 194 | }; | 205 | }; |
| 195 | 206 | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/renderer_opengl/gl_device.h" | 11 | #include "video_core/renderer_opengl/gl_device.h" |
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 13 | 14 | ||
| 14 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | 15 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", |
| @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
| 18 | 19 | ||
| 19 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) | 20 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) |
| 20 | : buffer_size(size) { | 21 | : state_tracker{state_tracker_} { |
| 21 | gl_buffer.Create(); | 22 | gl_buffer.Create(); |
| 22 | 23 | ||
| 23 | GLsizeiptr allocate_size = size; | ||
| 24 | if (vertex_data_usage) { | ||
| 25 | // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer | ||
| 26 | // read position is near the end and is an out-of-bound access to the vertex buffer. This is | ||
| 27 | // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the | ||
| 28 | // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the | ||
| 29 | // crash. | ||
| 30 | allocate_size *= 2; | ||
| 31 | } | ||
| 32 | |||
| 33 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; | 24 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; |
| 34 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); | 25 | glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); |
| 35 | mapped_ptr = static_cast<u8*>( | 26 | mapped_ptr = static_cast<u8*>( |
| 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | 27 | glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); |
| 37 | 28 | ||
| 38 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { | 29 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { |
| 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); |
| @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { | |||
| 46 | gl_buffer.Release(); | 37 | gl_buffer.Release(); |
| 47 | } | 38 | } |
| 48 | 39 | ||
| 49 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | 40 | std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { |
| 50 | ASSERT(size <= buffer_size); | 41 | ASSERT(size <= BUFFER_SIZE); |
| 51 | ASSERT(alignment <= buffer_size); | 42 | ASSERT(alignment <= BUFFER_SIZE); |
| 52 | mapped_size = size; | 43 | mapped_size = size; |
| 53 | 44 | ||
| 54 | if (alignment > 0) { | 45 | if (alignment > 0) { |
| 55 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | 46 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); |
| 56 | } | 47 | } |
| 57 | 48 | ||
| 58 | bool invalidate = false; | 49 | if (buffer_pos + size > BUFFER_SIZE) { |
| 59 | if (buffer_pos + size > buffer_size) { | ||
| 60 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | 50 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); |
| 61 | glInvalidateBufferData(gl_buffer.handle); | 51 | glInvalidateBufferData(gl_buffer.handle); |
| 52 | state_tracker.InvalidateStreamBuffer(); | ||
| 62 | 53 | ||
| 63 | buffer_pos = 0; | 54 | buffer_pos = 0; |
| 64 | invalidate = true; | ||
| 65 | } | 55 | } |
| 66 | 56 | ||
| 67 | return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); | 57 | return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); |
| 68 | } | 58 | } |
| 69 | 59 | ||
| 70 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | 60 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -4,29 +4,31 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <tuple> | 7 | #include <utility> |
| 8 | |||
| 8 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 10 | |||
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 11 | 13 | ||
| 12 | namespace OpenGL { | 14 | namespace OpenGL { |
| 13 | 15 | ||
| 14 | class Device; | 16 | class Device; |
| 17 | class StateTracker; | ||
| 15 | 18 | ||
| 16 | class OGLStreamBuffer : private NonCopyable { | 19 | class OGLStreamBuffer : private NonCopyable { |
| 17 | public: | 20 | public: |
| 18 | explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); | 21 | explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); |
| 19 | ~OGLStreamBuffer(); | 22 | ~OGLStreamBuffer(); |
| 20 | 23 | ||
| 21 | /* | 24 | /* |
| 22 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | 25 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes |
| 23 | * and the optional alignment requirement. | 26 | * and the optional alignment requirement. |
| 24 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | 27 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. |
| 25 | * The return values are the pointer to the new chunk, the offset within the buffer, | 28 | * The return values are the pointer to the new chunk, and the offset within the buffer. |
| 26 | * and the invalidation flag for previous chunks. | ||
| 27 | * The actual used size must be specified on unmapping the chunk. | 29 | * The actual used size must be specified on unmapping the chunk. |
| 28 | */ | 30 | */ |
| 29 | std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); | 31 | std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); |
| 30 | 32 | ||
| 31 | void Unmap(GLsizeiptr size); | 33 | void Unmap(GLsizeiptr size); |
| 32 | 34 | ||
| @@ -39,15 +41,18 @@ public: | |||
| 39 | } | 41 | } |
| 40 | 42 | ||
| 41 | GLsizeiptr Size() const noexcept { | 43 | GLsizeiptr Size() const noexcept { |
| 42 | return buffer_size; | 44 | return BUFFER_SIZE; |
| 43 | } | 45 | } |
| 44 | 46 | ||
| 45 | private: | 47 | private: |
| 48 | static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; | ||
| 49 | |||
| 50 | StateTracker& state_tracker; | ||
| 51 | |||
| 46 | OGLBuffer gl_buffer; | 52 | OGLBuffer gl_buffer; |
| 47 | 53 | ||
| 48 | GLuint64EXT gpu_address = 0; | 54 | GLuint64EXT gpu_address = 0; |
| 49 | GLintptr buffer_pos = 0; | 55 | GLintptr buffer_pos = 0; |
| 50 | GLsizeiptr buffer_size = 0; | ||
| 51 | GLsizeiptr mapped_size = 0; | 56 | GLsizeiptr mapped_size = 0; |
| 52 | u8* mapped_ptr = nullptr; | 57 | u8* mapped_ptr = nullptr; |
| 53 | }; | 58 | }; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..546cb6d00 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -2,37 +2,60 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include <algorithm> |
| 6 | #include "common/bit_util.h" | 6 | #include <array> |
| 7 | #include "common/common_types.h" | 7 | #include <bit> |
| 8 | #include "common/microprofile.h" | 8 | #include <string> |
| 9 | #include "common/scope_exit.h" | 9 | |
| 10 | #include "core/core.h" | 10 | #include <glad/glad.h> |
| 11 | #include "video_core/morton.h" | 11 | |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 15 | #include "video_core/renderer_opengl/utils.h" | 16 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 16 | #include "video_core/texture_cache/surface_base.h" | 17 | #include "video_core/renderer_opengl/util_shaders.h" |
| 18 | #include "video_core/surface.h" | ||
| 19 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 20 | #include "video_core/texture_cache/samples_helper.h" | ||
| 17 | #include "video_core/texture_cache/texture_cache.h" | 21 | #include "video_core/texture_cache/texture_cache.h" |
| 18 | #include "video_core/textures/convert.h" | 22 | #include "video_core/textures/decoders.h" |
| 19 | #include "video_core/textures/texture.h" | ||
| 20 | 23 | ||
| 21 | namespace OpenGL { | 24 | namespace OpenGL { |
| 22 | 25 | ||
| 23 | using Tegra::Texture::SwizzleSource; | 26 | namespace { |
| 24 | using VideoCore::MortonSwizzleMode; | ||
| 25 | 27 | ||
| 28 | using Tegra::Texture::SwizzleSource; | ||
| 29 | using Tegra::Texture::TextureMipmapFilter; | ||
| 30 | using Tegra::Texture::TextureType; | ||
| 31 | using Tegra::Texture::TICEntry; | ||
| 32 | using Tegra::Texture::TSCEntry; | ||
| 33 | using VideoCommon::CalculateLevelStrideAlignment; | ||
| 34 | using VideoCommon::ImageCopy; | ||
| 35 | using VideoCommon::ImageFlagBits; | ||
| 36 | using VideoCommon::ImageType; | ||
| 37 | using VideoCommon::NUM_RT; | ||
| 38 | using VideoCommon::SamplesLog2; | ||
| 39 | using VideoCommon::SwizzleParameters; | ||
| 40 | using VideoCore::Surface::BytesPerBlock; | ||
| 41 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 42 | using VideoCore::Surface::IsPixelFormatSRGB; | ||
| 43 | using VideoCore::Surface::MaxPixelFormat; | ||
| 26 | using VideoCore::Surface::PixelFormat; | 44 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::SurfaceTarget; | ||
| 28 | using VideoCore::Surface::SurfaceType; | 45 | using VideoCore::Surface::SurfaceType; |
| 29 | 46 | ||
| 30 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 47 | struct CopyOrigin { |
| 31 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 48 | GLint level; |
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | 49 | GLint x; |
| 33 | MP_RGB(128, 192, 128)); | 50 | GLint y; |
| 51 | GLint z; | ||
| 52 | }; | ||
| 34 | 53 | ||
| 35 | namespace { | 54 | struct CopyRegion { |
| 55 | GLsizei width; | ||
| 56 | GLsizei height; | ||
| 57 | GLsizei depth; | ||
| 58 | }; | ||
| 36 | 59 | ||
| 37 | struct FormatTuple { | 60 | struct FormatTuple { |
| 38 | GLenum internal_format; | 61 | GLenum internal_format; |
| @@ -40,7 +63,7 @@ struct FormatTuple { | |||
| 40 | GLenum type = GL_NONE; | 63 | GLenum type = GL_NONE; |
| 41 | }; | 64 | }; |
| 42 | 65 | ||
| 43 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | 66 | constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ |
| 44 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | 67 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM |
| 45 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | 68 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM |
| 46 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | 69 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT |
| @@ -103,72 +126,113 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 103 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | 126 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM |
| 104 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | 127 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM |
| 105 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | 128 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM |
| 106 | // Compressed sRGB formats | 129 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB |
| 107 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | 130 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB |
| 108 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB |
| 109 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | 132 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB |
| 110 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | 133 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM |
| 111 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | 134 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB |
| 112 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | 135 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB |
| 113 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | 136 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB |
| 114 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | 137 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB |
| 115 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | 138 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM |
| 116 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | 139 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB |
| 117 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | 140 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM |
| 118 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | 141 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB |
| 119 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | 142 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM |
| 120 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | 143 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB |
| 121 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | 144 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM |
| 122 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | 145 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB |
| 123 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | 146 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM |
| 124 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | 147 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB |
| 125 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | 148 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM |
| 126 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | 149 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB |
| 127 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | 150 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM |
| 128 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | 151 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB |
| 129 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | 152 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT |
| 130 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | 153 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT |
| 131 | 154 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | |
| 132 | // Depth formats | 155 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT |
| 133 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | 156 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM |
| 134 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 135 | |||
| 136 | // DepthStencil formats | ||
| 137 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 138 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 139 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | 157 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, |
| 140 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | 158 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT |
| 141 | }}; | 159 | }}; |
| 142 | 160 | ||
| 161 | constexpr std::array ACCELERATED_FORMATS{ | ||
| 162 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, | ||
| 163 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, | ||
| 164 | GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, | ||
| 165 | GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, | ||
| 166 | GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, | ||
| 167 | GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, | ||
| 168 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, | ||
| 169 | }; | ||
| 170 | |||
| 143 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | 171 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { |
| 144 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | 172 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); |
| 145 | return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; | 173 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; |
| 146 | } | 174 | } |
| 147 | 175 | ||
| 148 | GLenum GetTextureTarget(const SurfaceTarget& target) { | 176 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { |
| 149 | switch (target) { | 177 | switch (info.type) { |
| 150 | case SurfaceTarget::TextureBuffer: | 178 | case ImageType::e1D: |
| 179 | return GL_TEXTURE_1D_ARRAY; | ||
| 180 | case ImageType::e2D: | ||
| 181 | if (info.num_samples > 1) { | ||
| 182 | return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; | ||
| 183 | } | ||
| 184 | return GL_TEXTURE_2D_ARRAY; | ||
| 185 | case ImageType::e3D: | ||
| 186 | return GL_TEXTURE_3D; | ||
| 187 | case ImageType::Linear: | ||
| 188 | return GL_TEXTURE_2D_ARRAY; | ||
| 189 | case ImageType::Buffer: | ||
| 151 | return GL_TEXTURE_BUFFER; | 190 | return GL_TEXTURE_BUFFER; |
| 152 | case SurfaceTarget::Texture1D: | 191 | } |
| 192 | UNREACHABLE_MSG("Invalid image type={}", info.type); | ||
| 193 | return GL_NONE; | ||
| 194 | } | ||
| 195 | |||
| 196 | GLenum ImageTarget(ImageViewType type, int num_samples = 1) { | ||
| 197 | const bool is_multisampled = num_samples > 1; | ||
| 198 | switch (type) { | ||
| 199 | case ImageViewType::e1D: | ||
| 153 | return GL_TEXTURE_1D; | 200 | return GL_TEXTURE_1D; |
| 154 | case SurfaceTarget::Texture2D: | 201 | case ImageViewType::e2D: |
| 155 | return GL_TEXTURE_2D; | 202 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; |
| 156 | case SurfaceTarget::Texture3D: | 203 | case ImageViewType::Cube: |
| 204 | return GL_TEXTURE_CUBE_MAP; | ||
| 205 | case ImageViewType::e3D: | ||
| 157 | return GL_TEXTURE_3D; | 206 | return GL_TEXTURE_3D; |
| 158 | case SurfaceTarget::Texture1DArray: | 207 | case ImageViewType::e1DArray: |
| 159 | return GL_TEXTURE_1D_ARRAY; | 208 | return GL_TEXTURE_1D_ARRAY; |
| 160 | case SurfaceTarget::Texture2DArray: | 209 | case ImageViewType::e2DArray: |
| 161 | return GL_TEXTURE_2D_ARRAY; | 210 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; |
| 162 | case SurfaceTarget::TextureCubemap: | 211 | case ImageViewType::CubeArray: |
| 163 | return GL_TEXTURE_CUBE_MAP; | ||
| 164 | case SurfaceTarget::TextureCubeArray: | ||
| 165 | return GL_TEXTURE_CUBE_MAP_ARRAY; | 212 | return GL_TEXTURE_CUBE_MAP_ARRAY; |
| 213 | case ImageViewType::Rect: | ||
| 214 | return GL_TEXTURE_RECTANGLE; | ||
| 215 | case ImageViewType::Buffer: | ||
| 216 | return GL_TEXTURE_BUFFER; | ||
| 166 | } | 217 | } |
| 167 | UNREACHABLE(); | 218 | UNREACHABLE_MSG("Invalid image view type={}", type); |
| 168 | return {}; | 219 | return GL_NONE; |
| 169 | } | 220 | } |
| 170 | 221 | ||
| 171 | GLint GetSwizzleSource(SwizzleSource source) { | 222 | GLenum TextureMode(PixelFormat format, bool is_first) { |
| 223 | switch (format) { | ||
| 224 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 225 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 226 | return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; | ||
| 227 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 228 | return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; | ||
| 229 | default: | ||
| 230 | UNREACHABLE(); | ||
| 231 | return GL_DEPTH_COMPONENT; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | GLint Swizzle(SwizzleSource source) { | ||
| 172 | switch (source) { | 236 | switch (source) { |
| 173 | case SwizzleSource::Zero: | 237 | case SwizzleSource::Zero: |
| 174 | return GL_ZERO; | 238 | return GL_ZERO; |
| @@ -184,530 +248,813 @@ GLint GetSwizzleSource(SwizzleSource source) { | |||
| 184 | case SwizzleSource::OneFloat: | 248 | case SwizzleSource::OneFloat: |
| 185 | return GL_ONE; | 249 | return GL_ONE; |
| 186 | } | 250 | } |
| 187 | UNREACHABLE(); | 251 | UNREACHABLE_MSG("Invalid swizzle source={}", source); |
| 188 | return GL_NONE; | 252 | return GL_NONE; |
| 189 | } | 253 | } |
| 190 | 254 | ||
| 191 | GLenum GetComponent(PixelFormat format, bool is_first) { | 255 | GLenum AttachmentType(PixelFormat format) { |
| 192 | switch (format) { | 256 | switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { |
| 193 | case PixelFormat::D24_UNORM_S8_UINT: | 257 | case SurfaceType::Depth: |
| 194 | case PixelFormat::D32_FLOAT_S8_UINT: | 258 | return GL_DEPTH_ATTACHMENT; |
| 195 | return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; | 259 | case SurfaceType::DepthStencil: |
| 196 | case PixelFormat::S8_UINT_D24_UNORM: | 260 | return GL_DEPTH_STENCIL_ATTACHMENT; |
| 197 | return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; | ||
| 198 | default: | 261 | default: |
| 199 | UNREACHABLE(); | 262 | UNIMPLEMENTED_MSG("Unimplemented type={}", type); |
| 200 | return GL_DEPTH_COMPONENT; | 263 | return GL_NONE; |
| 201 | } | 264 | } |
| 202 | } | 265 | } |
| 203 | 266 | ||
| 204 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | 267 | [[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { |
| 205 | if (params.IsBuffer()) { | 268 | if (!device.HasASTC() && IsPixelFormatASTC(format)) { |
| 206 | return; | 269 | return true; |
| 207 | } | 270 | } |
| 208 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 271 | switch (format) { |
| 209 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 272 | case PixelFormat::BC4_UNORM: |
| 210 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | 273 | case PixelFormat::BC5_UNORM: |
| 211 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | 274 | return type == ImageType::e3D; |
| 212 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); | 275 | default: |
| 213 | if (params.num_levels == 1) { | 276 | break; |
| 214 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); | ||
| 215 | } | 277 | } |
| 278 | return false; | ||
| 216 | } | 279 | } |
| 217 | 280 | ||
| 218 | OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, | 281 | [[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { |
| 219 | OGLBuffer& texture_buffer) { | 282 | switch (value) { |
| 220 | OGLTexture texture; | 283 | case SwizzleSource::G: |
| 221 | texture.Create(target); | 284 | return SwizzleSource::R; |
| 285 | default: | ||
| 286 | return value; | ||
| 287 | } | ||
| 288 | } | ||
| 222 | 289 | ||
| 223 | switch (params.target) { | 290 | void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) { |
| 224 | case SurfaceTarget::Texture1D: | 291 | switch (format) { |
| 225 | glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); | 292 | case PixelFormat::D24_UNORM_S8_UINT: |
| 226 | break; | 293 | case PixelFormat::D32_FLOAT_S8_UINT: |
| 227 | case SurfaceTarget::TextureBuffer: | 294 | case PixelFormat::S8_UINT_D24_UNORM: |
| 228 | texture_buffer.Create(); | 295 | UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); |
| 229 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | 296 | glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, |
| 230 | nullptr, GL_DYNAMIC_STORAGE_BIT); | 297 | TextureMode(format, swizzle[0] == SwizzleSource::R)); |
| 231 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | 298 | std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); |
| 232 | break; | 299 | break; |
| 233 | case SurfaceTarget::Texture2D: | 300 | default: |
| 234 | case SurfaceTarget::TextureCubemap: | ||
| 235 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 236 | params.height); | ||
| 237 | break; | 301 | break; |
| 238 | case SurfaceTarget::Texture3D: | 302 | } |
| 239 | case SurfaceTarget::Texture2DArray: | 303 | std::array<GLint, 4> gl_swizzle; |
| 240 | case SurfaceTarget::TextureCubeArray: | 304 | std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); |
| 241 | glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, | 305 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); |
| 242 | params.height, params.depth); | 306 | } |
| 307 | |||
| 308 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | ||
| 309 | const VideoCommon::ImageInfo& info) { | ||
| 310 | // Disable accelerated uploads for now as they don't implement swizzled uploads | ||
| 311 | return false; | ||
| 312 | switch (info.type) { | ||
| 313 | case ImageType::e2D: | ||
| 314 | case ImageType::e3D: | ||
| 315 | case ImageType::Linear: | ||
| 243 | break; | 316 | break; |
| 244 | default: | 317 | default: |
| 245 | UNREACHABLE(); | 318 | return false; |
| 319 | } | ||
| 320 | const GLenum internal_format = GetFormatTuple(info.format).internal_format; | ||
| 321 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); | ||
| 322 | if (format_info.is_compressed) { | ||
| 323 | return false; | ||
| 324 | } | ||
| 325 | if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { | ||
| 326 | return false; | ||
| 246 | } | 327 | } |
| 328 | if (format_info.compatibility_by_size) { | ||
| 329 | return true; | ||
| 330 | } | ||
| 331 | const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); | ||
| 332 | const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; | ||
| 333 | return format_info.compatibility_class == store_class; | ||
| 334 | } | ||
| 247 | 335 | ||
| 248 | ApplyTextureDefaults(params, texture.handle); | 336 | [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, |
| 337 | VideoCommon::SubresourceLayers subresource, GLenum target) { | ||
| 338 | switch (target) { | ||
| 339 | case GL_TEXTURE_2D_ARRAY: | ||
| 340 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: | ||
| 341 | return CopyOrigin{ | ||
| 342 | .level = static_cast<GLint>(subresource.base_level), | ||
| 343 | .x = static_cast<GLint>(offset.x), | ||
| 344 | .y = static_cast<GLint>(offset.y), | ||
| 345 | .z = static_cast<GLint>(subresource.base_layer), | ||
| 346 | }; | ||
| 347 | case GL_TEXTURE_3D: | ||
| 348 | return CopyOrigin{ | ||
| 349 | .level = static_cast<GLint>(subresource.base_level), | ||
| 350 | .x = static_cast<GLint>(offset.x), | ||
| 351 | .y = static_cast<GLint>(offset.y), | ||
| 352 | .z = static_cast<GLint>(offset.z), | ||
| 353 | }; | ||
| 354 | default: | ||
| 355 | UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); | ||
| 356 | return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; | ||
| 357 | } | ||
| 358 | } | ||
| 249 | 359 | ||
| 250 | return texture; | 360 | [[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, |
| 361 | VideoCommon::SubresourceLayers dst_subresource, | ||
| 362 | GLenum target) { | ||
| 363 | switch (target) { | ||
| 364 | case GL_TEXTURE_2D_ARRAY: | ||
| 365 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: | ||
| 366 | return CopyRegion{ | ||
| 367 | .width = static_cast<GLsizei>(extent.width), | ||
| 368 | .height = static_cast<GLsizei>(extent.height), | ||
| 369 | .depth = static_cast<GLsizei>(dst_subresource.num_layers), | ||
| 370 | }; | ||
| 371 | case GL_TEXTURE_3D: | ||
| 372 | return CopyRegion{ | ||
| 373 | .width = static_cast<GLsizei>(extent.width), | ||
| 374 | .height = static_cast<GLsizei>(extent.height), | ||
| 375 | .depth = static_cast<GLsizei>(extent.depth), | ||
| 376 | }; | ||
| 377 | default: | ||
| 378 | UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); | ||
| 379 | return CopyRegion{.width = 0, .height = 0, .depth = 0}; | ||
| 380 | } | ||
| 251 | } | 381 | } |
| 252 | 382 | ||
| 253 | constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | 383 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { |
| 254 | SwizzleSource w_source) { | 384 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 255 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | 385 | const GLuint texture = image_view->DefaultHandle(); |
| 256 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | 386 | glNamedFramebufferTexture(fbo, attachment, texture, 0); |
| 387 | return; | ||
| 388 | } | ||
| 389 | const GLuint texture = image_view->Handle(ImageViewType::e3D); | ||
| 390 | if (image_view->range.extent.layers > 1) { | ||
| 391 | // TODO: OpenGL doesn't support rendering to a fixed number of slices | ||
| 392 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||
| 393 | } else { | ||
| 394 | const u32 slice = image_view->range.base.layer; | ||
| 395 | glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); | ||
| 396 | } | ||
| 257 | } | 397 | } |
| 258 | 398 | ||
| 259 | } // Anonymous namespace | 399 | } // Anonymous namespace |
| 260 | 400 | ||
| 261 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, | 401 | ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) |
| 262 | bool is_astc_supported_) | 402 | : span(map, size), sync{sync_}, handle{handle_} {} |
| 263 | : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} { | ||
| 264 | if (is_converted) { | ||
| 265 | internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 266 | format = GL_RGBA; | ||
| 267 | type = GL_UNSIGNED_BYTE; | ||
| 268 | } else { | ||
| 269 | const auto& tuple{GetFormatTuple(params.pixel_format)}; | ||
| 270 | internal_format = tuple.internal_format; | ||
| 271 | format = tuple.format; | ||
| 272 | type = tuple.type; | ||
| 273 | is_compressed = params.IsCompressed(); | ||
| 274 | } | ||
| 275 | target = GetTextureTarget(params.target); | ||
| 276 | texture = CreateTexture(params, target, internal_format, texture_buffer); | ||
| 277 | DecorateSurfaceName(); | ||
| 278 | 403 | ||
| 279 | u32 num_layers = 1; | 404 | ImageBufferMap::~ImageBufferMap() { |
| 280 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { | 405 | if (sync) { |
| 281 | num_layers = params.depth; | 406 | sync->Create(); |
| 282 | } | 407 | } |
| 283 | |||
| 284 | main_view = | ||
| 285 | CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); | ||
| 286 | } | 408 | } |
| 287 | 409 | ||
| 288 | CachedSurface::~CachedSurface() = default; | 410 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, |
| 411 | StateTracker& state_tracker_) | ||
| 412 | : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { | ||
| 413 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | ||
| 414 | for (size_t i = 0; i < TARGETS.size(); ++i) { | ||
| 415 | const GLenum target = TARGETS[i]; | ||
| 416 | for (const FormatTuple& tuple : FORMAT_TABLE) { | ||
| 417 | const GLenum format = tuple.internal_format; | ||
| 418 | GLint compat_class; | ||
| 419 | GLint compat_type; | ||
| 420 | GLint is_compressed; | ||
| 421 | glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); | ||
| 422 | glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, | ||
| 423 | &compat_type); | ||
| 424 | glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); | ||
| 425 | const FormatProperties properties{ | ||
| 426 | .compatibility_class = static_cast<GLenum>(compat_class), | ||
| 427 | .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, | ||
| 428 | .is_compressed = is_compressed == GL_TRUE, | ||
| 429 | }; | ||
| 430 | format_properties[i].emplace(format, properties); | ||
| 431 | } | ||
| 432 | } | ||
| 433 | has_broken_texture_view_formats = device.HasBrokenTextureViewFormats(); | ||
| 434 | |||
| 435 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); | ||
| 436 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); | ||
| 437 | null_image_3d.Create(GL_TEXTURE_3D); | ||
| 438 | null_image_rect.Create(GL_TEXTURE_RECTANGLE); | ||
| 439 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); | ||
| 440 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); | ||
| 441 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); | ||
| 442 | glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); | ||
| 443 | |||
| 444 | std::array<GLuint, 4> new_handles; | ||
| 445 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); | ||
| 446 | null_image_view_1d.handle = new_handles[0]; | ||
| 447 | null_image_view_2d.handle = new_handles[1]; | ||
| 448 | null_image_view_2d_array.handle = new_handles[2]; | ||
| 449 | null_image_view_cube.handle = new_handles[3]; | ||
| 450 | glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, | ||
| 451 | 0, 1); | ||
| 452 | glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, | ||
| 453 | 1, 0, 1); | ||
| 454 | glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, | ||
| 455 | null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); | ||
| 456 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, | ||
| 457 | GL_R8, 0, 1, 0, 6); | ||
| 458 | const std::array texture_handles{ | ||
| 459 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, | ||
| 460 | null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, | ||
| 461 | null_image_view_2d_array.handle, null_image_view_cube.handle, | ||
| 462 | }; | ||
| 463 | for (const GLuint handle : texture_handles) { | ||
| 464 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; | ||
| 465 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); | ||
| 466 | } | ||
| 467 | const auto set_view = [this](ImageViewType type, GLuint handle) { | ||
| 468 | if (device.HasDebuggingToolAttached()) { | ||
| 469 | const std::string name = fmt::format("NullImage {}", type); | ||
| 470 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 471 | } | ||
| 472 | null_image_views[static_cast<size_t>(type)] = handle; | ||
| 473 | }; | ||
| 474 | set_view(ImageViewType::e1D, null_image_view_1d.handle); | ||
| 475 | set_view(ImageViewType::e2D, null_image_view_2d.handle); | ||
| 476 | set_view(ImageViewType::Cube, null_image_view_cube.handle); | ||
| 477 | set_view(ImageViewType::e3D, null_image_3d.handle); | ||
| 478 | set_view(ImageViewType::e1DArray, null_image_1d_array.handle); | ||
| 479 | set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); | ||
| 480 | set_view(ImageViewType::CubeArray, null_image_cube_array.handle); | ||
| 481 | set_view(ImageViewType::Rect, null_image_rect.handle); | ||
| 482 | } | ||
| 289 | 483 | ||
| 290 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | 484 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| 291 | MICROPROFILE_SCOPE(OpenGL_Texture_Download); | ||
| 292 | 485 | ||
| 293 | if (params.IsBuffer()) { | 486 | void TextureCacheRuntime::Finish() { |
| 294 | glGetNamedBufferSubData(texture_buffer.handle, 0, | 487 | glFinish(); |
| 295 | static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), | 488 | } |
| 296 | staging_buffer.data()); | ||
| 297 | return; | ||
| 298 | } | ||
| 299 | 489 | ||
| 300 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); | 490 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { |
| 491 | return upload_buffers.RequestMap(size, true); | ||
| 492 | } | ||
| 301 | 493 | ||
| 302 | for (u32 level = 0; level < params.emulated_levels; ++level) { | 494 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { |
| 303 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); | 495 | return download_buffers.RequestMap(size, false); |
| 304 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 496 | } |
| 305 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); | ||
| 306 | 497 | ||
| 307 | u8* const mip_data = staging_buffer.data() + mip_offset; | 498 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, |
| 308 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); | 499 | std::span<const ImageCopy> copies) { |
| 309 | if (is_compressed) { | 500 | const GLuint dst_name = dst_image.Handle(); |
| 310 | glGetCompressedTextureImage(texture.handle, level, size, mip_data); | 501 | const GLuint src_name = src_image.Handle(); |
| 311 | } else { | 502 | const GLenum dst_target = ImageTarget(dst_image.info); |
| 312 | glGetTextureImage(texture.handle, level, format, type, size, mip_data); | 503 | const GLenum src_target = ImageTarget(src_image.info); |
| 313 | } | 504 | for (const ImageCopy& copy : copies) { |
| 505 | const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); | ||
| 506 | const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); | ||
| 507 | const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); | ||
| 508 | glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, | ||
| 509 | src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, | ||
| 510 | dst_origin.y, dst_origin.z, region.width, region.height, region.depth); | ||
| 314 | } | 511 | } |
| 315 | } | 512 | } |
| 316 | 513 | ||
| 317 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | 514 | bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { |
| 318 | MICROPROFILE_SCOPE(OpenGL_Texture_Upload); | 515 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 319 | SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); | 516 | return false; |
| 320 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 321 | UploadTextureMipmap(level, staging_buffer); | ||
| 322 | } | 517 | } |
| 518 | return true; | ||
| 323 | } | 519 | } |
| 324 | 520 | ||
| 325 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { | 521 | void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, |
| 326 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); | 522 | std::span<const ImageCopy> copies) { |
| 327 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 523 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 328 | 524 | ASSERT(src.info.type == ImageType::e3D); | |
| 329 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); | 525 | util_shaders.CopyBC4(dst, src, copies); |
| 330 | const u8* buffer{staging_buffer.data() + mip_offset}; | ||
| 331 | if (is_compressed) { | ||
| 332 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; | ||
| 333 | switch (params.target) { | ||
| 334 | case SurfaceTarget::Texture2D: | ||
| 335 | glCompressedTextureSubImage2D(texture.handle, level, 0, 0, | ||
| 336 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 337 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 338 | internal_format, image_size, buffer); | ||
| 339 | break; | ||
| 340 | case SurfaceTarget::Texture3D: | ||
| 341 | case SurfaceTarget::Texture2DArray: | ||
| 342 | case SurfaceTarget::TextureCubeArray: | ||
| 343 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, | ||
| 344 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 345 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 346 | static_cast<GLsizei>(params.GetMipDepth(level)), | ||
| 347 | internal_format, image_size, buffer); | ||
| 348 | break; | ||
| 349 | case SurfaceTarget::TextureCubemap: { | ||
| 350 | const std::size_t host_layer_size{params.GetHostLayerSize(level)}; | ||
| 351 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 352 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 353 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 354 | static_cast<GLsizei>(params.GetMipHeight(level)), 1, | ||
| 355 | internal_format, | ||
| 356 | static_cast<GLsizei>(host_layer_size), buffer); | ||
| 357 | buffer += host_layer_size; | ||
| 358 | } | ||
| 359 | break; | ||
| 360 | } | ||
| 361 | default: | ||
| 362 | UNREACHABLE(); | ||
| 363 | } | ||
| 364 | } else { | 526 | } else { |
| 365 | switch (params.target) { | 527 | UNREACHABLE(); |
| 366 | case SurfaceTarget::Texture1D: | ||
| 367 | glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, | ||
| 368 | buffer); | ||
| 369 | break; | ||
| 370 | case SurfaceTarget::TextureBuffer: | ||
| 371 | ASSERT(level == 0); | ||
| 372 | glNamedBufferSubData(texture_buffer.handle, 0, | ||
| 373 | params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); | ||
| 374 | break; | ||
| 375 | case SurfaceTarget::Texture1DArray: | ||
| 376 | case SurfaceTarget::Texture2D: | ||
| 377 | glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), | ||
| 378 | params.GetMipHeight(level), format, type, buffer); | ||
| 379 | break; | ||
| 380 | case SurfaceTarget::Texture3D: | ||
| 381 | case SurfaceTarget::Texture2DArray: | ||
| 382 | case SurfaceTarget::TextureCubeArray: | ||
| 383 | glTextureSubImage3D( | ||
| 384 | texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 385 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 386 | static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); | ||
| 387 | break; | ||
| 388 | case SurfaceTarget::TextureCubemap: | ||
| 389 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 390 | glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 391 | params.GetMipWidth(level), params.GetMipHeight(level), 1, | ||
| 392 | format, type, buffer); | ||
| 393 | buffer += params.GetHostLayerSize(level); | ||
| 394 | } | ||
| 395 | break; | ||
| 396 | default: | ||
| 397 | UNREACHABLE(); | ||
| 398 | } | ||
| 399 | } | 528 | } |
| 400 | } | 529 | } |
| 401 | 530 | ||
| 402 | void CachedSurface::DecorateSurfaceName() { | 531 | void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, |
| 403 | LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); | 532 | const std::array<Offset2D, 2>& dst_region, |
| 404 | } | 533 | const std::array<Offset2D, 2>& src_region, |
| 534 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 535 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 536 | state_tracker.NotifyScissor0(); | ||
| 537 | state_tracker.NotifyRasterizeEnable(); | ||
| 538 | state_tracker.NotifyFramebufferSRGB(); | ||
| 405 | 539 | ||
| 406 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { | 540 | ASSERT(dst->BufferBits() == src->BufferBits()); |
| 407 | LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); | 541 | |
| 542 | glEnable(GL_FRAMEBUFFER_SRGB); | ||
| 543 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 544 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 545 | |||
| 546 | const GLbitfield buffer_bits = dst->BufferBits(); | ||
| 547 | const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; | ||
| 548 | const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; | ||
| 549 | glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, | ||
| 550 | src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, | ||
| 551 | dst_region[1].x, dst_region[1].y, buffer_bits, | ||
| 552 | is_linear ? GL_LINEAR : GL_NEAREST); | ||
| 408 | } | 553 | } |
| 409 | 554 | ||
| 410 | View CachedSurface::CreateView(const ViewParams& view_key) { | 555 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 411 | return CreateViewInner(view_key, false); | 556 | size_t buffer_offset, |
| 557 | std::span<const SwizzleParameters> swizzles) { | ||
| 558 | switch (image.info.type) { | ||
| 559 | case ImageType::e2D: | ||
| 560 | return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); | ||
| 561 | case ImageType::e3D: | ||
| 562 | return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); | ||
| 563 | case ImageType::Linear: | ||
| 564 | return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); | ||
| 565 | default: | ||
| 566 | UNREACHABLE(); | ||
| 567 | break; | ||
| 568 | } | ||
| 412 | } | 569 | } |
| 413 | 570 | ||
| 414 | View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { | 571 | void TextureCacheRuntime::InsertUploadMemoryBarrier() { |
| 415 | auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); | 572 | glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); |
| 416 | views[view_key] = view; | ||
| 417 | if (!is_proxy) | ||
| 418 | view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); | ||
| 419 | return view; | ||
| 420 | } | 573 | } |
| 421 | 574 | ||
| 422 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, | 575 | FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { |
| 423 | bool is_proxy_) | 576 | switch (type) { |
| 424 | : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, | 577 | case ImageType::e1D: |
| 425 | target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { | 578 | return format_properties[0].at(internal_format); |
| 426 | if (!is_proxy_) { | 579 | case ImageType::e2D: |
| 427 | main_view = CreateTextureView(); | 580 | case ImageType::Linear: |
| 581 | return format_properties[1].at(internal_format); | ||
| 582 | case ImageType::e3D: | ||
| 583 | return format_properties[2].at(internal_format); | ||
| 584 | default: | ||
| 585 | UNREACHABLE(); | ||
| 586 | return FormatProperties{}; | ||
| 428 | } | 587 | } |
| 429 | } | 588 | } |
| 430 | 589 | ||
| 431 | CachedSurfaceView::~CachedSurfaceView() = default; | 590 | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) |
| 591 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} | ||
| 432 | 592 | ||
| 433 | void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { | 593 | TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; |
| 434 | ASSERT(params.num_levels == 1); | ||
| 435 | 594 | ||
| 436 | if (params.target == SurfaceTarget::Texture3D) { | 595 | ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, |
| 437 | if (params.num_layers > 1) { | 596 | bool insert_fence) { |
| 438 | ASSERT(params.base_layer == 0); | 597 | const size_t index = RequestBuffer(requested_size); |
| 439 | glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); | 598 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; |
| 440 | } else { | 599 | return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); |
| 441 | glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, | 600 | } |
| 442 | params.base_level, params.base_layer); | 601 | |
| 443 | } | 602 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { |
| 444 | return; | 603 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { |
| 604 | return *index; | ||
| 445 | } | 605 | } |
| 446 | 606 | ||
| 447 | if (params.num_layers > 1) { | 607 | OGLBuffer& buffer = buffers.emplace_back(); |
| 448 | UNIMPLEMENTED_IF(params.base_layer != 0); | 608 | buffer.Create(); |
| 449 | glFramebufferTexture(fb_target, attachment, GetTexture(), 0); | 609 | glNamedBufferStorage(buffer.handle, requested_size, nullptr, |
| 450 | return; | 610 | storage_flags | GL_MAP_PERSISTENT_BIT); |
| 611 | maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, | ||
| 612 | map_flags | GL_MAP_PERSISTENT_BIT))); | ||
| 613 | |||
| 614 | syncs.emplace_back(); | ||
| 615 | sizes.push_back(requested_size); | ||
| 616 | |||
| 617 | ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||
| 618 | maps.size() == sizes.size()); | ||
| 619 | |||
| 620 | return buffers.size() - 1; | ||
| 621 | } | ||
| 622 | |||
| 623 | std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { | ||
| 624 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); | ||
| 625 | std::optional<size_t> found; | ||
| 626 | const size_t num_buffers = sizes.size(); | ||
| 627 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 628 | const size_t buffer_size = sizes[index]; | ||
| 629 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | ||
| 630 | continue; | ||
| 631 | } | ||
| 632 | if (syncs[index].handle != 0) { | ||
| 633 | GLint status; | ||
| 634 | glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); | ||
| 635 | if (status != GL_SIGNALED) { | ||
| 636 | continue; | ||
| 637 | } | ||
| 638 | syncs[index].Release(); | ||
| 639 | } | ||
| 640 | smallest_buffer = buffer_size; | ||
| 641 | found = index; | ||
| 451 | } | 642 | } |
| 643 | return found; | ||
| 644 | } | ||
| 452 | 645 | ||
| 453 | const GLenum view_target = surface.GetTarget(); | 646 | Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, |
| 454 | const GLuint texture = surface.GetTexture(); | 647 | VAddr cpu_addr_) |
| 455 | switch (surface.GetSurfaceParams().target) { | 648 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { |
| 456 | case SurfaceTarget::Texture1D: | 649 | if (CanBeAccelerated(runtime, info)) { |
| 457 | glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); | 650 | flags |= ImageFlagBits::AcceleratedUpload; |
| 651 | } | ||
| 652 | if (IsConverted(runtime.device, info.format, info.type)) { | ||
| 653 | flags |= ImageFlagBits::Converted; | ||
| 654 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 655 | gl_format = GL_RGBA; | ||
| 656 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | ||
| 657 | } else { | ||
| 658 | const auto& tuple = GetFormatTuple(info.format); | ||
| 659 | gl_internal_format = tuple.internal_format; | ||
| 660 | gl_format = tuple.format; | ||
| 661 | gl_type = tuple.type; | ||
| 662 | } | ||
| 663 | const GLenum target = ImageTarget(info); | ||
| 664 | const GLsizei width = info.size.width; | ||
| 665 | const GLsizei height = info.size.height; | ||
| 666 | const GLsizei depth = info.size.depth; | ||
| 667 | const int max_host_mip_levels = std::bit_width(info.size.width); | ||
| 668 | const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 669 | const GLsizei num_layers = info.resources.layers; | ||
| 670 | const GLsizei num_samples = info.num_samples; | ||
| 671 | |||
| 672 | GLuint handle = 0; | ||
| 673 | if (target != GL_TEXTURE_BUFFER) { | ||
| 674 | texture.Create(target); | ||
| 675 | handle = texture.handle; | ||
| 676 | } | ||
| 677 | switch (target) { | ||
| 678 | case GL_TEXTURE_1D_ARRAY: | ||
| 679 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); | ||
| 458 | break; | 680 | break; |
| 459 | case SurfaceTarget::Texture2D: | 681 | case GL_TEXTURE_2D_ARRAY: |
| 460 | glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); | 682 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); |
| 461 | break; | 683 | break; |
| 462 | case SurfaceTarget::Texture1DArray: | 684 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { |
| 463 | case SurfaceTarget::Texture2DArray: | 685 | // TODO: Where should 'fixedsamplelocations' come from? |
| 464 | case SurfaceTarget::TextureCubemap: | 686 | const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); |
| 465 | case SurfaceTarget::TextureCubeArray: | 687 | glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, |
| 466 | glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, | 688 | height >> samples_y, num_layers, GL_FALSE); |
| 467 | params.base_layer); | 689 | break; |
| 690 | } | ||
| 691 | case GL_TEXTURE_RECTANGLE: | ||
| 692 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); | ||
| 693 | break; | ||
| 694 | case GL_TEXTURE_3D: | ||
| 695 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | ||
| 696 | break; | ||
| 697 | case GL_TEXTURE_BUFFER: | ||
| 698 | buffer.Create(); | ||
| 699 | glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); | ||
| 468 | break; | 700 | break; |
| 469 | default: | 701 | default: |
| 470 | UNIMPLEMENTED(); | 702 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); |
| 703 | break; | ||
| 704 | } | ||
| 705 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 706 | const std::string name = VideoCommon::Name(*this); | ||
| 707 | glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, | ||
| 708 | static_cast<GLsizei>(name.size()), name.data()); | ||
| 471 | } | 709 | } |
| 472 | } | 710 | } |
| 473 | 711 | ||
| 474 | GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, | 712 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 475 | SwizzleSource z_source, SwizzleSource w_source) { | 713 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 476 | if (GetSurfaceParams().IsBuffer()) { | 714 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); |
| 477 | return GetTexture(); | 715 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); |
| 478 | } | ||
| 479 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 480 | if (current_swizzle == new_swizzle) { | ||
| 481 | return current_view; | ||
| 482 | } | ||
| 483 | current_swizzle = new_swizzle; | ||
| 484 | 716 | ||
| 485 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | 717 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| 486 | OGLTextureView& view = entry->second; | ||
| 487 | if (!is_cache_miss) { | ||
| 488 | current_view = view.handle; | ||
| 489 | return view.handle; | ||
| 490 | } | ||
| 491 | view = CreateTextureView(); | ||
| 492 | current_view = view.handle; | ||
| 493 | 718 | ||
| 494 | std::array swizzle{x_source, y_source, z_source, w_source}; | 719 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 720 | u32 current_image_height = std::numeric_limits<u32>::max(); | ||
| 495 | 721 | ||
| 496 | switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { | 722 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 497 | case PixelFormat::D24_UNORM_S8_UINT: | 723 | if (current_row_length != copy.buffer_row_length) { |
| 498 | case PixelFormat::D32_FLOAT_S8_UINT: | 724 | current_row_length = copy.buffer_row_length; |
| 499 | case PixelFormat::S8_UINT_D24_UNORM: | 725 | glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); |
| 500 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | 726 | } |
| 501 | glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | 727 | if (current_image_height != copy.buffer_image_height) { |
| 502 | GetComponent(pixel_format, x_source == SwizzleSource::R)); | 728 | current_image_height = copy.buffer_image_height; |
| 503 | 729 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); | |
| 504 | // Make sure we sample the first component | 730 | } |
| 505 | std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { | 731 | CopyBufferToImage(copy, buffer_offset); |
| 506 | return value == SwizzleSource::G ? SwizzleSource::R : value; | ||
| 507 | }); | ||
| 508 | [[fallthrough]]; | ||
| 509 | default: { | ||
| 510 | const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), | ||
| 511 | GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; | ||
| 512 | glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 513 | break; | ||
| 514 | } | ||
| 515 | } | 732 | } |
| 516 | return view.handle; | ||
| 517 | } | 733 | } |
| 518 | 734 | ||
| 519 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | 735 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 520 | OGLTextureView texture_view; | 736 | std::span<const VideoCommon::BufferCopy> copies) { |
| 521 | texture_view.Create(); | 737 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 522 | 738 | glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, | |
| 523 | if (target == GL_TEXTURE_3D) { | 739 | copy.dst_offset, copy.size); |
| 524 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 525 | params.base_level, params.num_levels, 0, 1); | ||
| 526 | } else { | ||
| 527 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 528 | params.base_level, params.num_levels, params.base_layer, params.num_layers); | ||
| 529 | } | 740 | } |
| 530 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); | ||
| 531 | |||
| 532 | return texture_view; | ||
| 533 | } | 741 | } |
| 534 | 742 | ||
| 535 | TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 743 | void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, |
| 536 | Tegra::Engines::Maxwell3D& maxwell3d_, | 744 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 537 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 745 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 538 | StateTracker& state_tracker_) | ||
| 539 | : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, | ||
| 540 | state_tracker{state_tracker_} { | ||
| 541 | src_framebuffer.Create(); | ||
| 542 | dst_framebuffer.Create(); | ||
| 543 | } | ||
| 544 | 746 | ||
| 545 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; | 747 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); |
| 748 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||
| 546 | 749 | ||
| 547 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | 750 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 548 | return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); | 751 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 549 | } | ||
| 550 | 752 | ||
| 551 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, | 753 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 552 | const VideoCommon::CopyParams& copy_params) { | 754 | if (current_row_length != copy.buffer_row_length) { |
| 553 | const auto& src_params = src_surface->GetSurfaceParams(); | 755 | current_row_length = copy.buffer_row_length; |
| 554 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 756 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); |
| 555 | if (src_params.type != dst_params.type) { | 757 | } |
| 556 | // A fallback is needed | 758 | if (current_image_height != copy.buffer_image_height) { |
| 557 | return; | 759 | current_image_height = copy.buffer_image_height; |
| 760 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | ||
| 761 | } | ||
| 762 | CopyImageToBuffer(copy, buffer_offset); | ||
| 558 | } | 763 | } |
| 559 | const auto src_handle = src_surface->GetTexture(); | ||
| 560 | const auto src_target = src_surface->GetTarget(); | ||
| 561 | const auto dst_handle = dst_surface->GetTexture(); | ||
| 562 | const auto dst_target = dst_surface->GetTarget(); | ||
| 563 | glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, | ||
| 564 | copy_params.source_y, copy_params.source_z, dst_handle, dst_target, | ||
| 565 | copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, | ||
| 566 | copy_params.dest_z, copy_params.width, copy_params.height, | ||
| 567 | copy_params.depth); | ||
| 568 | } | 764 | } |
| 569 | 765 | ||
| 570 | void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | 766 | void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { |
| 571 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 767 | // Compressed formats don't have a pixel format or type |
| 572 | const auto& src_params{src_view->GetSurfaceParams()}; | 768 | const bool is_compressed = gl_format == GL_NONE; |
| 573 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 769 | const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset); |
| 574 | UNIMPLEMENTED_IF(src_params.depth != 1); | ||
| 575 | UNIMPLEMENTED_IF(dst_params.depth != 1); | ||
| 576 | |||
| 577 | state_tracker.NotifyScissor0(); | ||
| 578 | state_tracker.NotifyFramebuffer(); | ||
| 579 | state_tracker.NotifyRasterizeEnable(); | ||
| 580 | state_tracker.NotifyFramebufferSRGB(); | ||
| 581 | 770 | ||
| 582 | if (dst_params.srgb_conversion) { | 771 | switch (info.type) { |
| 583 | glEnable(GL_FRAMEBUFFER_SRGB); | 772 | case ImageType::e1D: |
| 584 | } else { | 773 | if (is_compressed) { |
| 585 | glDisable(GL_FRAMEBUFFER_SRGB); | 774 | glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, |
| 775 | copy.image_offset.x, copy.image_subresource.base_layer, | ||
| 776 | copy.image_extent.width, | ||
| 777 | copy.image_subresource.num_layers, gl_internal_format, | ||
| 778 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 779 | } else { | ||
| 780 | glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, | ||
| 781 | copy.image_offset.x, copy.image_subresource.base_layer, | ||
| 782 | copy.image_extent.width, copy.image_subresource.num_layers, | ||
| 783 | gl_format, gl_type, offset); | ||
| 784 | } | ||
| 785 | break; | ||
| 786 | case ImageType::e2D: | ||
| 787 | case ImageType::Linear: | ||
| 788 | if (is_compressed) { | ||
| 789 | glCompressedTextureSubImage3D( | ||
| 790 | texture.handle, copy.image_subresource.base_level, copy.image_offset.x, | ||
| 791 | copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, | ||
| 792 | copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, | ||
| 793 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 794 | } else { | ||
| 795 | glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, | ||
| 796 | copy.image_offset.x, copy.image_offset.y, | ||
| 797 | copy.image_subresource.base_layer, copy.image_extent.width, | ||
| 798 | copy.image_extent.height, copy.image_subresource.num_layers, | ||
| 799 | gl_format, gl_type, offset); | ||
| 800 | } | ||
| 801 | break; | ||
| 802 | case ImageType::e3D: | ||
| 803 | if (is_compressed) { | ||
| 804 | glCompressedTextureSubImage3D( | ||
| 805 | texture.handle, copy.image_subresource.base_level, copy.image_offset.x, | ||
| 806 | copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, | ||
| 807 | copy.image_extent.height, copy.image_extent.depth, gl_internal_format, | ||
| 808 | static_cast<GLsizei>(copy.buffer_size), offset); | ||
| 809 | } else { | ||
| 810 | glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, | ||
| 811 | copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, | ||
| 812 | copy.image_extent.width, copy.image_extent.height, | ||
| 813 | copy.image_extent.depth, gl_format, gl_type, offset); | ||
| 814 | } | ||
| 815 | break; | ||
| 816 | default: | ||
| 817 | UNREACHABLE(); | ||
| 586 | } | 818 | } |
| 587 | glDisable(GL_RASTERIZER_DISCARD); | 819 | } |
| 588 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 589 | |||
| 590 | glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); | ||
| 591 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); | ||
| 592 | |||
| 593 | GLenum buffers = 0; | ||
| 594 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 595 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); | ||
| 596 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 597 | 0); | ||
| 598 | |||
| 599 | dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 600 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 601 | 0); | ||
| 602 | |||
| 603 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 604 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 605 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 606 | src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 607 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 608 | 820 | ||
| 609 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 821 | void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { |
| 610 | dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | 822 | const GLint x_offset = copy.image_offset.x; |
| 611 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 823 | const GLsizei width = copy.image_extent.width; |
| 612 | 824 | ||
| 613 | buffers = GL_DEPTH_BUFFER_BIT; | 825 | const GLint level = copy.image_subresource.base_level; |
| 614 | } else if (src_params.type == SurfaceType::DepthStencil) { | 826 | const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size); |
| 615 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 827 | void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset); |
| 616 | src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 617 | 828 | ||
| 618 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 829 | GLint y_offset = 0; |
| 619 | dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | 830 | GLint z_offset = 0; |
| 831 | GLsizei height = 1; | ||
| 832 | GLsizei depth = 1; | ||
| 620 | 833 | ||
| 621 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | 834 | switch (info.type) { |
| 835 | case ImageType::e1D: | ||
| 836 | y_offset = copy.image_subresource.base_layer; | ||
| 837 | height = copy.image_subresource.num_layers; | ||
| 838 | break; | ||
| 839 | case ImageType::e2D: | ||
| 840 | case ImageType::Linear: | ||
| 841 | y_offset = copy.image_offset.y; | ||
| 842 | z_offset = copy.image_subresource.base_layer; | ||
| 843 | height = copy.image_extent.height; | ||
| 844 | depth = copy.image_subresource.num_layers; | ||
| 845 | break; | ||
| 846 | case ImageType::e3D: | ||
| 847 | y_offset = copy.image_offset.y; | ||
| 848 | z_offset = copy.image_offset.z; | ||
| 849 | height = copy.image_extent.height; | ||
| 850 | depth = copy.image_extent.depth; | ||
| 851 | break; | ||
| 852 | default: | ||
| 853 | UNREACHABLE(); | ||
| 854 | } | ||
| 855 | // Compressed formats don't have a pixel format or type | ||
| 856 | const bool is_compressed = gl_format == GL_NONE; | ||
| 857 | if (is_compressed) { | ||
| 858 | glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, | ||
| 859 | height, depth, buffer_size, offset); | ||
| 860 | } else { | ||
| 861 | glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, | ||
| 862 | depth, gl_format, gl_type, buffer_size, offset); | ||
| 622 | } | 863 | } |
| 623 | |||
| 624 | const Common::Rectangle<u32>& src_rect = copy_config.src_rect; | ||
| 625 | const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; | ||
| 626 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 627 | |||
| 628 | glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), | ||
| 629 | static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), | ||
| 630 | static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), | ||
| 631 | static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), | ||
| 632 | buffers, | ||
| 633 | is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); | ||
| 634 | } | 864 | } |
| 635 | 865 | ||
| 636 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 866 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 637 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | 867 | ImageId image_id_, Image& image) |
| 638 | const auto& src_params = src_surface->GetSurfaceParams(); | 868 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { |
| 639 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 869 | const Device& device = runtime.device; |
| 640 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 870 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 871 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||
| 872 | } else { | ||
| 873 | internal_format = GetFormatTuple(format).internal_format; | ||
| 874 | } | ||
| 875 | VideoCommon::SubresourceRange flatten_range = info.range; | ||
| 876 | std::array<GLuint, 2> handles; | ||
| 877 | stored_views.reserve(2); | ||
| 641 | 878 | ||
| 642 | const auto source_format = GetFormatTuple(src_params.pixel_format); | 879 | switch (info.type) { |
| 643 | const auto dest_format = GetFormatTuple(dst_params.pixel_format); | 880 | case ImageViewType::e1DArray: |
| 881 | flatten_range.extent.layers = 1; | ||
| 882 | [[fallthrough]]; | ||
| 883 | case ImageViewType::e1D: | ||
| 884 | glGenTextures(2, handles.data()); | ||
| 885 | SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); | ||
| 886 | SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); | ||
| 887 | break; | ||
| 888 | case ImageViewType::e2DArray: | ||
| 889 | flatten_range.extent.layers = 1; | ||
| 890 | [[fallthrough]]; | ||
| 891 | case ImageViewType::e2D: | ||
| 892 | if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { | ||
| 893 | // 2D and 2D array views on a 3D textures are used exclusively for render targets | ||
| 894 | ASSERT(info.range.extent.levels == 1); | ||
| 895 | const VideoCommon::SubresourceRange slice_range{ | ||
| 896 | .base = {.level = info.range.base.level, .layer = 0}, | ||
| 897 | .extent = {.levels = 1, .layers = 1}, | ||
| 898 | }; | ||
| 899 | glGenTextures(1, handles.data()); | ||
| 900 | SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); | ||
| 901 | break; | ||
| 902 | } | ||
| 903 | glGenTextures(2, handles.data()); | ||
| 904 | SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); | ||
| 905 | SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); | ||
| 906 | break; | ||
| 907 | case ImageViewType::e3D: | ||
| 908 | glGenTextures(1, handles.data()); | ||
| 909 | SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); | ||
| 910 | break; | ||
| 911 | case ImageViewType::CubeArray: | ||
| 912 | flatten_range.extent.layers = 6; | ||
| 913 | [[fallthrough]]; | ||
| 914 | case ImageViewType::Cube: | ||
| 915 | glGenTextures(2, handles.data()); | ||
| 916 | SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); | ||
| 917 | SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); | ||
| 918 | break; | ||
| 919 | case ImageViewType::Rect: | ||
| 920 | glGenTextures(1, handles.data()); | ||
| 921 | SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); | ||
| 922 | break; | ||
| 923 | case ImageViewType::Buffer: | ||
| 924 | glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); | ||
| 925 | SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); | ||
| 926 | break; | ||
| 927 | } | ||
| 928 | default_handle = Handle(info.type); | ||
| 929 | } | ||
| 644 | 930 | ||
| 645 | const std::size_t source_size = src_surface->GetHostSizeInBytes(); | 931 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) |
| 646 | const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); | 932 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| 647 | 933 | ||
| 648 | const std::size_t buffer_size = std::max(source_size, dest_size); | 934 | void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, |
| 935 | GLuint handle, const VideoCommon::ImageViewInfo& info, | ||
| 936 | VideoCommon::SubresourceRange view_range) { | ||
| 937 | if (info.type == ImageViewType::Buffer) { | ||
| 938 | // TODO: Take offset from buffer cache | ||
| 939 | glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, | ||
| 940 | image.guest_size_bytes); | ||
| 941 | } else { | ||
| 942 | const GLuint parent = image.texture.handle; | ||
| 943 | const GLenum target = ImageTarget(view_type, image.info.num_samples); | ||
| 944 | glTextureView(handle, target, parent, internal_format, view_range.base.level, | ||
| 945 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); | ||
| 946 | if (!info.IsRenderTarget()) { | ||
| 947 | ApplySwizzle(handle, format, info.Swizzle()); | ||
| 948 | } | ||
| 949 | } | ||
| 950 | if (device.HasDebuggingToolAttached()) { | ||
| 951 | const std::string name = VideoCommon::Name(*this, view_type); | ||
| 952 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 953 | } | ||
| 954 | stored_views.emplace_back().handle = handle; | ||
| 955 | views[static_cast<size_t>(view_type)] = handle; | ||
| 956 | } | ||
| 649 | 957 | ||
| 650 | GLuint copy_pbo_handle = FetchPBO(buffer_size); | 958 | Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { |
| 959 | const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; | ||
| 960 | const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); | ||
| 961 | const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); | ||
| 962 | const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); | ||
| 963 | const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); | ||
| 964 | const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; | ||
| 965 | |||
| 966 | UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); | ||
| 967 | UNIMPLEMENTED_IF(config.float_coord_normalization != 0); | ||
| 968 | |||
| 969 | sampler.Create(); | ||
| 970 | const GLuint handle = sampler.handle; | ||
| 971 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); | ||
| 972 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); | ||
| 973 | glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); | ||
| 974 | glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); | ||
| 975 | glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); | ||
| 976 | glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); | ||
| 977 | glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); | ||
| 978 | glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); | ||
| 979 | glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); | ||
| 980 | glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); | ||
| 981 | glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); | ||
| 982 | |||
| 983 | if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { | ||
| 984 | glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); | ||
| 985 | } else { | ||
| 986 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); | ||
| 987 | } | ||
| 988 | if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { | ||
| 989 | glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); | ||
| 990 | } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { | ||
| 991 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); | ||
| 992 | } | ||
| 993 | if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 994 | glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); | ||
| 995 | } else if (seamless == GL_FALSE) { | ||
| 996 | // We default to false because it's more common | ||
| 997 | LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); | ||
| 998 | } | ||
| 999 | } | ||
| 651 | 1000 | ||
| 652 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | 1001 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1002 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | ||
| 1003 | // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of | ||
| 1004 | // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared | ||
| 1005 | // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with | ||
| 1006 | // mismatching size, this is why core framebuffers are preferred. | ||
| 1007 | GLuint handle; | ||
| 1008 | glGenFramebuffers(1, &handle); | ||
| 1009 | glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); | ||
| 1010 | |||
| 1011 | GLsizei num_buffers = 0; | ||
| 1012 | std::array<GLenum, NUM_RT> gl_draw_buffers; | ||
| 1013 | gl_draw_buffers.fill(GL_NONE); | ||
| 1014 | |||
| 1015 | for (size_t index = 0; index < color_buffers.size(); ++index) { | ||
| 1016 | const ImageView* const image_view = color_buffers[index]; | ||
| 1017 | if (!image_view) { | ||
| 1018 | continue; | ||
| 1019 | } | ||
| 1020 | buffer_bits |= GL_COLOR_BUFFER_BIT; | ||
| 1021 | gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; | ||
| 1022 | num_buffers = static_cast<GLsizei>(index + 1); | ||
| 653 | 1023 | ||
| 654 | if (src_surface->IsCompressed()) { | 1024 | const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index); |
| 655 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | 1025 | AttachTexture(handle, attachment, image_view); |
| 656 | nullptr); | ||
| 657 | } else { | ||
| 658 | glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, | ||
| 659 | static_cast<GLsizei>(source_size), nullptr); | ||
| 660 | } | 1026 | } |
| 661 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 662 | 1027 | ||
| 663 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | 1028 | if (const ImageView* const image_view = depth_buffer; image_view) { |
| 1029 | if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { | ||
| 1030 | buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 1031 | } else { | ||
| 1032 | buffer_bits |= GL_DEPTH_BUFFER_BIT; | ||
| 1033 | } | ||
| 1034 | const GLenum attachment = AttachmentType(image_view->format); | ||
| 1035 | AttachTexture(handle, attachment, image_view); | ||
| 1036 | } | ||
| 664 | 1037 | ||
| 665 | const GLsizei width = static_cast<GLsizei>(dst_params.width); | 1038 | if (num_buffers > 1) { |
| 666 | const GLsizei height = static_cast<GLsizei>(dst_params.height); | 1039 | glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); |
| 667 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | 1040 | } else if (num_buffers > 0) { |
| 668 | if (dst_surface->IsCompressed()) { | 1041 | glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); |
| 669 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | ||
| 670 | UNREACHABLE(); | ||
| 671 | } else { | 1042 | } else { |
| 672 | switch (dst_params.target) { | 1043 | glNamedFramebufferDrawBuffer(handle, GL_NONE); |
| 673 | case SurfaceTarget::Texture1D: | ||
| 674 | glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, | ||
| 675 | dest_format.type, nullptr); | ||
| 676 | break; | ||
| 677 | case SurfaceTarget::Texture2D: | ||
| 678 | glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, | ||
| 679 | dest_format.format, dest_format.type, nullptr); | ||
| 680 | break; | ||
| 681 | case SurfaceTarget::Texture3D: | ||
| 682 | case SurfaceTarget::Texture2DArray: | ||
| 683 | case SurfaceTarget::TextureCubeArray: | ||
| 684 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 685 | dest_format.format, dest_format.type, nullptr); | ||
| 686 | break; | ||
| 687 | case SurfaceTarget::TextureCubemap: | ||
| 688 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 689 | dest_format.format, dest_format.type, nullptr); | ||
| 690 | break; | ||
| 691 | default: | ||
| 692 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); | ||
| 693 | UNREACHABLE(); | ||
| 694 | } | ||
| 695 | } | 1044 | } |
| 696 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 697 | 1045 | ||
| 698 | glTextureBarrier(); | 1046 | glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); |
| 699 | } | 1047 | glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); |
| 1048 | // TODO | ||
| 1049 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); | ||
| 1050 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); | ||
| 1051 | // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); | ||
| 700 | 1052 | ||
| 701 | GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { | 1053 | if (runtime.device.HasDebuggingToolAttached()) { |
| 702 | ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); | 1054 | const std::string name = VideoCommon::Name(key); |
| 703 | const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); | 1055 | glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 704 | OGLBuffer& cp = copy_pbo_cache[l2]; | ||
| 705 | if (cp.handle == 0) { | ||
| 706 | const std::size_t ceil_size = 1ULL << l2; | ||
| 707 | cp.Create(); | ||
| 708 | cp.MakeStreamCopy(ceil_size); | ||
| 709 | } | 1056 | } |
| 710 | return cp.handle; | 1057 | framebuffer.handle = handle; |
| 711 | } | 1058 | } |
| 712 | 1059 | ||
| 713 | } // namespace OpenGL | 1060 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..15b7c3676 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -4,157 +4,251 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | 7 | #include <memory> |
| 10 | #include <unordered_map> | 8 | #include <span> |
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | 9 | ||
| 14 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 15 | 11 | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/util_shaders.h" | ||
| 20 | #include "video_core/texture_cache/texture_cache.h" | 14 | #include "video_core/texture_cache/texture_cache.h" |
| 21 | 15 | ||
| 22 | namespace OpenGL { | 16 | namespace OpenGL { |
| 23 | 17 | ||
| 24 | using VideoCommon::SurfaceParams; | 18 | class Device; |
| 25 | using VideoCommon::ViewParams; | 19 | class ProgramManager; |
| 26 | |||
| 27 | class CachedSurfaceView; | ||
| 28 | class CachedSurface; | ||
| 29 | class TextureCacheOpenGL; | ||
| 30 | class StateTracker; | 20 | class StateTracker; |
| 31 | 21 | ||
| 32 | using Surface = std::shared_ptr<CachedSurface>; | 22 | class Framebuffer; |
| 33 | using View = std::shared_ptr<CachedSurfaceView>; | 23 | class Image; |
| 34 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | 24 | class ImageView; |
| 25 | class Sampler; | ||
| 35 | 26 | ||
| 36 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | 27 | using VideoCommon::ImageId; |
| 37 | friend CachedSurfaceView; | 28 | using VideoCommon::ImageViewId; |
| 29 | using VideoCommon::ImageViewType; | ||
| 30 | using VideoCommon::NUM_RT; | ||
| 31 | using VideoCommon::Offset2D; | ||
| 32 | using VideoCommon::RenderTargets; | ||
| 38 | 33 | ||
| 34 | class ImageBufferMap { | ||
| 39 | public: | 35 | public: |
| 40 | explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, | 36 | explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); |
| 41 | bool is_astc_supported_); | 37 | ~ImageBufferMap(); |
| 42 | ~CachedSurface(); | ||
| 43 | |||
| 44 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 45 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 46 | 38 | ||
| 47 | GLenum GetTarget() const { | 39 | GLuint Handle() const noexcept { |
| 48 | return target; | 40 | return handle; |
| 49 | } | 41 | } |
| 50 | 42 | ||
| 51 | GLuint GetTexture() const { | 43 | std::span<u8> Span() const noexcept { |
| 52 | return texture.handle; | 44 | return span; |
| 53 | } | 45 | } |
| 54 | 46 | ||
| 55 | bool IsCompressed() const { | 47 | private: |
| 56 | return is_compressed; | 48 | std::span<u8> span; |
| 49 | OGLSync* sync; | ||
| 50 | GLuint handle; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct FormatProperties { | ||
| 54 | GLenum compatibility_class; | ||
| 55 | bool compatibility_by_size; | ||
| 56 | bool is_compressed; | ||
| 57 | }; | ||
| 58 | |||
| 59 | class TextureCacheRuntime { | ||
| 60 | friend Framebuffer; | ||
| 61 | friend Image; | ||
| 62 | friend ImageView; | ||
| 63 | friend Sampler; | ||
| 64 | |||
| 65 | public: | ||
| 66 | explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, | ||
| 67 | StateTracker& state_tracker); | ||
| 68 | ~TextureCacheRuntime(); | ||
| 69 | |||
| 70 | void Finish(); | ||
| 71 | |||
| 72 | ImageBufferMap MapUploadBuffer(size_t size); | ||
| 73 | |||
| 74 | ImageBufferMap MapDownloadBuffer(size_t size); | ||
| 75 | |||
| 76 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 77 | |||
| 78 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { | ||
| 79 | UNIMPLEMENTED(); | ||
| 57 | } | 80 | } |
| 58 | 81 | ||
| 59 | protected: | 82 | bool CanImageBeCopied(const Image& dst, const Image& src); |
| 60 | void DecorateSurfaceName() override; | 83 | |
| 84 | void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 85 | |||
| 86 | void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | ||
| 87 | const std::array<Offset2D, 2>& dst_region, | ||
| 88 | const std::array<Offset2D, 2>& src_region, | ||
| 89 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 90 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 91 | |||
| 92 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 93 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 61 | 94 | ||
| 62 | View CreateView(const ViewParams& view_key) override; | 95 | void InsertUploadMemoryBarrier(); |
| 63 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | 96 | |
| 97 | FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; | ||
| 98 | |||
| 99 | bool HasBrokenTextureViewFormats() const noexcept { | ||
| 100 | return has_broken_texture_view_formats; | ||
| 101 | } | ||
| 64 | 102 | ||
| 65 | private: | 103 | private: |
| 66 | void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); | 104 | struct StagingBuffers { |
| 105 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||
| 106 | ~StagingBuffers(); | ||
| 67 | 107 | ||
| 68 | GLenum internal_format{}; | 108 | ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); |
| 69 | GLenum format{}; | ||
| 70 | GLenum type{}; | ||
| 71 | bool is_compressed{}; | ||
| 72 | GLenum target{}; | ||
| 73 | u32 view_count{}; | ||
| 74 | 109 | ||
| 75 | OGLTexture texture; | 110 | size_t RequestBuffer(size_t requested_size); |
| 76 | OGLBuffer texture_buffer; | 111 | |
| 112 | std::optional<size_t> FindBuffer(size_t requested_size); | ||
| 113 | |||
| 114 | std::vector<OGLSync> syncs; | ||
| 115 | std::vector<OGLBuffer> buffers; | ||
| 116 | std::vector<u8*> maps; | ||
| 117 | std::vector<size_t> sizes; | ||
| 118 | GLenum storage_flags; | ||
| 119 | GLenum map_flags; | ||
| 120 | }; | ||
| 121 | |||
| 122 | const Device& device; | ||
| 123 | StateTracker& state_tracker; | ||
| 124 | UtilShaders util_shaders; | ||
| 125 | |||
| 126 | std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; | ||
| 127 | bool has_broken_texture_view_formats = false; | ||
| 128 | |||
| 129 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | ||
| 130 | StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; | ||
| 131 | |||
| 132 | OGLTexture null_image_1d_array; | ||
| 133 | OGLTexture null_image_cube_array; | ||
| 134 | OGLTexture null_image_3d; | ||
| 135 | OGLTexture null_image_rect; | ||
| 136 | OGLTextureView null_image_view_1d; | ||
| 137 | OGLTextureView null_image_view_2d; | ||
| 138 | OGLTextureView null_image_view_2d_array; | ||
| 139 | OGLTextureView null_image_view_cube; | ||
| 140 | |||
| 141 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; | ||
| 77 | }; | 142 | }; |
| 78 | 143 | ||
| 79 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 144 | class Image : public VideoCommon::ImageBase { |
| 145 | friend ImageView; | ||
| 146 | |||
| 80 | public: | 147 | public: |
| 81 | explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); | 148 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 82 | ~CachedSurfaceView(); | 149 | VAddr cpu_addr); |
| 83 | 150 | ||
| 84 | /// @brief Attaches this texture view to the currently bound fb_target framebuffer | 151 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 85 | /// @param attachment Attachment to bind textures to | 152 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 86 | /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) | ||
| 87 | void Attach(GLenum attachment, GLenum fb_target) const; | ||
| 88 | 153 | ||
| 89 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, | 154 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 90 | Tegra::Texture::SwizzleSource y_source, | 155 | std::span<const VideoCommon::BufferCopy> copies); |
| 91 | Tegra::Texture::SwizzleSource z_source, | ||
| 92 | Tegra::Texture::SwizzleSource w_source); | ||
| 93 | 156 | ||
| 94 | void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); | 157 | void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, |
| 158 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 95 | 159 | ||
| 96 | void MarkAsModified(u64 tick) { | 160 | GLuint Handle() const noexcept { |
| 97 | surface.MarkAsModified(true, tick); | 161 | return texture.handle; |
| 98 | } | 162 | } |
| 99 | 163 | ||
| 100 | GLuint GetTexture() const { | 164 | private: |
| 101 | if (is_proxy) { | 165 | void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 102 | return surface.GetTexture(); | 166 | |
| 103 | } | 167 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 104 | return main_view.handle; | 168 | |
| 169 | OGLTexture texture; | ||
| 170 | OGLTextureView store_view; | ||
| 171 | OGLBuffer buffer; | ||
| 172 | GLenum gl_internal_format = GL_NONE; | ||
| 173 | GLenum gl_format = GL_NONE; | ||
| 174 | GLenum gl_type = GL_NONE; | ||
| 175 | }; | ||
| 176 | |||
| 177 | class ImageView : public VideoCommon::ImageViewBase { | ||
| 178 | friend Image; | ||
| 179 | |||
| 180 | public: | ||
| 181 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | ||
| 182 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | ||
| 183 | |||
| 184 | [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { | ||
| 185 | return views[static_cast<size_t>(query_type)]; | ||
| 105 | } | 186 | } |
| 106 | 187 | ||
| 107 | GLenum GetFormat() const { | 188 | [[nodiscard]] GLuint DefaultHandle() const noexcept { |
| 108 | return format; | 189 | return default_handle; |
| 109 | } | 190 | } |
| 110 | 191 | ||
| 111 | const SurfaceParams& GetSurfaceParams() const { | 192 | [[nodiscard]] GLenum Format() const noexcept { |
| 112 | return surface.GetSurfaceParams(); | 193 | return internal_format; |
| 113 | } | 194 | } |
| 114 | 195 | ||
| 115 | private: | 196 | private: |
| 116 | OGLTextureView CreateTextureView() const; | 197 | void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, |
| 198 | const VideoCommon::ImageViewInfo& info, | ||
| 199 | VideoCommon::SubresourceRange view_range); | ||
| 200 | |||
| 201 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; | ||
| 202 | std::vector<OGLTextureView> stored_views; | ||
| 203 | GLuint default_handle = 0; | ||
| 204 | GLenum internal_format = GL_NONE; | ||
| 205 | }; | ||
| 206 | |||
| 207 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | ||
| 117 | 208 | ||
| 118 | CachedSurface& surface; | 209 | class Sampler { |
| 119 | const GLenum format; | 210 | public: |
| 120 | const GLenum target; | 211 | explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); |
| 121 | const bool is_proxy; | ||
| 122 | 212 | ||
| 123 | std::unordered_map<u32, OGLTextureView> view_cache; | 213 | GLuint Handle() const noexcept { |
| 124 | OGLTextureView main_view; | 214 | return sampler.handle; |
| 215 | } | ||
| 125 | 216 | ||
| 126 | // Use an invalid default so it always fails the comparison test | 217 | private: |
| 127 | u32 current_swizzle = 0xffffffff; | 218 | OGLSampler sampler; |
| 128 | GLuint current_view = 0; | ||
| 129 | }; | 219 | }; |
| 130 | 220 | ||
| 131 | class TextureCacheOpenGL final : public TextureCacheBase { | 221 | class Framebuffer { |
| 132 | public: | 222 | public: |
| 133 | explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 223 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, |
| 134 | Tegra::Engines::Maxwell3D& maxwell3d_, | 224 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 135 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||
| 136 | StateTracker& state_tracker); | ||
| 137 | ~TextureCacheOpenGL(); | ||
| 138 | |||
| 139 | protected: | ||
| 140 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 141 | |||
| 142 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 143 | const VideoCommon::CopyParams& copy_params) override; | ||
| 144 | 225 | ||
| 145 | void ImageBlit(View& src_view, View& dst_view, | 226 | [[nodiscard]] GLuint Handle() const noexcept { |
| 146 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 227 | return framebuffer.handle; |
| 228 | } | ||
| 147 | 229 | ||
| 148 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | 230 | [[nodiscard]] GLbitfield BufferBits() const noexcept { |
| 231 | return buffer_bits; | ||
| 232 | } | ||
| 149 | 233 | ||
| 150 | private: | 234 | private: |
| 151 | GLuint FetchPBO(std::size_t buffer_size); | 235 | OGLFramebuffer framebuffer; |
| 152 | 236 | GLbitfield buffer_bits = GL_NONE; | |
| 153 | StateTracker& state_tracker; | 237 | }; |
| 154 | 238 | ||
| 155 | OGLFramebuffer src_framebuffer; | 239 | struct TextureCacheParams { |
| 156 | OGLFramebuffer dst_framebuffer; | 240 | static constexpr bool ENABLE_VALIDATION = true; |
| 157 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | 241 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 242 | static constexpr bool HAS_EMULATED_COPIES = true; | ||
| 243 | |||
| 244 | using Runtime = OpenGL::TextureCacheRuntime; | ||
| 245 | using Image = OpenGL::Image; | ||
| 246 | using ImageAlloc = OpenGL::ImageAlloc; | ||
| 247 | using ImageView = OpenGL::ImageView; | ||
| 248 | using Sampler = OpenGL::Sampler; | ||
| 249 | using Framebuffer = OpenGL::Framebuffer; | ||
| 158 | }; | 250 | }; |
| 159 | 251 | ||
| 252 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||
| 253 | |||
| 160 | } // namespace OpenGL | 254 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { | |||
| 475 | return GL_FILL; | 475 | return GL_FILL; |
| 476 | } | 476 | } |
| 477 | 477 | ||
| 478 | inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { | ||
| 479 | switch (filter) { | ||
| 480 | case Tegra::Texture::SamplerReduction::WeightedAverage: | ||
| 481 | return GL_WEIGHTED_AVERAGE_ARB; | ||
| 482 | case Tegra::Texture::SamplerReduction::Min: | ||
| 483 | return GL_MIN; | ||
| 484 | case Tegra::Texture::SamplerReduction::Max: | ||
| 485 | return GL_MAX; | ||
| 486 | } | ||
| 487 | UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter)); | ||
| 488 | return GL_WEIGHTED_AVERAGE_ARB; | ||
| 489 | } | ||
| 490 | |||
| 478 | inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { | 491 | inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { |
| 479 | // Enumeration order matches register order. We can convert it arithmetically. | 492 | // Enumeration order matches register order. We can convert it arithmetically. |
| 480 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); | 493 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -23,10 +23,10 @@ | |||
| 23 | #include "core/telemetry_session.h" | 23 | #include "core/telemetry_session.h" |
| 24 | #include "video_core/host_shaders/opengl_present_frag.h" | 24 | #include "video_core/host_shaders/opengl_present_frag.h" |
| 25 | #include "video_core/host_shaders/opengl_present_vert.h" | 25 | #include "video_core/host_shaders/opengl_present_vert.h" |
| 26 | #include "video_core/morton.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 29 | #include "video_core/renderer_opengl/renderer_opengl.h" | 28 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 29 | #include "video_core/textures/decoders.h" | ||
| 30 | 30 | ||
| 31 | namespace OpenGL { | 31 | namespace OpenGL { |
| 32 | 32 | ||
| @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 140 | if (!framebuffer) { | 140 | if (!framebuffer) { |
| 141 | return; | 141 | return; |
| 142 | } | 142 | } |
| 143 | |||
| 144 | PrepareRendertarget(framebuffer); | 143 | PrepareRendertarget(framebuffer); |
| 145 | RenderScreenshot(); | 144 | RenderScreenshot(); |
| 146 | 145 | ||
| 147 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); | 146 | state_tracker.BindFramebuffer(0); |
| 148 | DrawScreen(emu_window.GetFramebufferLayout()); | 147 | DrawScreen(emu_window.GetFramebufferLayout()); |
| 149 | 148 | ||
| 150 | ++m_current_frame; | 149 | ++m_current_frame; |
| @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 187 | // Reset the screen info's display texture to its own permanent texture | 186 | // Reset the screen info's display texture to its own permanent texture |
| 188 | screen_info.display_texture = screen_info.texture.resource.handle; | 187 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 189 | 188 | ||
| 190 | const auto pixel_format{ | ||
| 191 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | ||
| 192 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 193 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 194 | u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | ||
| 195 | rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); | ||
| 196 | |||
| 197 | // TODO(Rodrigo): Read this from HLE | 189 | // TODO(Rodrigo): Read this from HLE |
| 198 | constexpr u32 block_height_log2 = 4; | 190 | constexpr u32 block_height_log2 = 4; |
| 199 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | 191 | const auto pixel_format{ |
| 200 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | 192 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 201 | gl_framebuffer_data.data(), host_ptr); | 193 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 202 | 194 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( | |
| 195 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | ||
| 196 | const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | ||
| 197 | const std::span<const u8> input_data(host_ptr, size_in_bytes); | ||
| 198 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | ||
| 199 | framebuffer.width, framebuffer.height, 1, block_height_log2, | ||
| 200 | 0); | ||
| 201 | |||
| 202 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 203 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | 203 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); |
| 204 | 204 | ||
| 205 | // Update existing texture | 205 | // Update existing texture |
| @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 238 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); | 238 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); |
| 239 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | 239 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); |
| 240 | 240 | ||
| 241 | // Generate presentation sampler | ||
| 242 | present_sampler.Create(); | ||
| 243 | glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 244 | |||
| 241 | // Generate VBO handle for drawing | 245 | // Generate VBO handle for drawing |
| 242 | vertex_buffer.Create(); | 246 | vertex_buffer.Create(); |
| 243 | 247 | ||
| @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 255 | // Clear screen to black | 259 | // Clear screen to black |
| 256 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 260 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 257 | 261 | ||
| 262 | // Enable seamless cubemaps when per texture parameters are not available | ||
| 263 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 264 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | ||
| 265 | } | ||
| 266 | |||
| 258 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | 267 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it |
| 259 | if (device.HasVertexBufferUnifiedMemory()) { | 268 | if (device.HasVertexBufferUnifiedMemory()) { |
| 260 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | 269 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); |
| @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 296 | 305 | ||
| 297 | const auto pixel_format{ | 306 | const auto pixel_format{ |
| 298 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | 307 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 299 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | 308 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 300 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); | 309 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); |
| 301 | 310 | ||
| 302 | GLint internal_format; | 311 | GLint internal_format; |
| @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 315 | internal_format = GL_RGBA8; | 324 | internal_format = GL_RGBA8; |
| 316 | texture.gl_format = GL_RGBA; | 325 | texture.gl_format = GL_RGBA; |
| 317 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 326 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 318 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", | 327 | // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", |
| 319 | static_cast<u32>(framebuffer.pixel_format)); | 328 | // static_cast<u32>(framebuffer.pixel_format)); |
| 320 | } | 329 | } |
| 321 | 330 | ||
| 322 | texture.resource.Release(); | 331 | texture.resource.Release(); |
| @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 382 | state_tracker.NotifyPolygonModes(); | 391 | state_tracker.NotifyPolygonModes(); |
| 383 | state_tracker.NotifyViewport0(); | 392 | state_tracker.NotifyViewport0(); |
| 384 | state_tracker.NotifyScissor0(); | 393 | state_tracker.NotifyScissor0(); |
| 385 | state_tracker.NotifyColorMask0(); | 394 | state_tracker.NotifyColorMask(0); |
| 386 | state_tracker.NotifyBlend0(); | 395 | state_tracker.NotifyBlend0(); |
| 387 | state_tracker.NotifyFramebuffer(); | 396 | state_tracker.NotifyFramebuffer(); |
| 388 | state_tracker.NotifyFrontFace(); | 397 | state_tracker.NotifyFrontFace(); |
| @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 440 | } | 449 | } |
| 441 | 450 | ||
| 442 | glBindTextureUnit(0, screen_info.display_texture); | 451 | glBindTextureUnit(0, screen_info.display_texture); |
| 443 | glBindSampler(0, 0); | 452 | glBindSampler(0, present_sampler.handle); |
| 444 | 453 | ||
| 445 | glClear(GL_COLOR_BUFFER_BIT); | 454 | glClear(GL_COLOR_BUFFER_BIT); |
| 446 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 455 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 473 | 482 | ||
| 474 | DrawScreen(layout); | 483 | DrawScreen(layout); |
| 475 | 484 | ||
| 485 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 486 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 476 | glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, | 487 | glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, |
| 477 | renderer_settings.screenshot_bits); | 488 | renderer_settings.screenshot_bits); |
| 478 | 489 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -102,6 +102,7 @@ private: | |||
| 102 | StateTracker state_tracker{gpu}; | 102 | StateTracker state_tracker{gpu}; |
| 103 | 103 | ||
| 104 | // OpenGL object IDs | 104 | // OpenGL object IDs |
| 105 | OGLSampler present_sampler; | ||
| 105 | OGLBuffer vertex_buffer; | 106 | OGLBuffer vertex_buffer; |
| 106 | OGLProgram vertex_program; | 107 | OGLProgram vertex_program; |
| 107 | OGLProgram fragment_program; | 108 | OGLProgram fragment_program; |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <bit> | ||
| 6 | #include <span> | ||
| 7 | #include <string_view> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 14 | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||
| 15 | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||
| 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||
| 17 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 21 | #include "video_core/renderer_opengl/util_shaders.h" | ||
| 22 | #include "video_core/surface.h" | ||
| 23 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 24 | #include "video_core/texture_cache/types.h" | ||
| 25 | #include "video_core/texture_cache/util.h" | ||
| 26 | #include "video_core/textures/decoders.h" | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | using namespace HostShaders; | ||
| 31 | |||
| 32 | using VideoCommon::Extent3D; | ||
| 33 | using VideoCommon::ImageCopy; | ||
| 34 | using VideoCommon::ImageType; | ||
| 35 | using VideoCommon::SwizzleParameters; | ||
| 36 | using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; | ||
| 37 | using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; | ||
| 38 | using VideoCore::Surface::BytesPerBlock; | ||
| 39 | |||
| 40 | namespace { | ||
| 41 | |||
| 42 | OGLProgram MakeProgram(std::string_view source) { | ||
| 43 | OGLShader shader; | ||
| 44 | shader.Create(source, GL_COMPUTE_SHADER); | ||
| 45 | |||
| 46 | OGLProgram program; | ||
| 47 | program.Create(true, false, shader.handle); | ||
| 48 | return program; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||
| 54 | : program_manager{program_manager_}, | ||
| 55 | block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | ||
| 56 | block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||
| 57 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||
| 58 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | ||
| 59 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||
| 60 | swizzle_table_buffer.Create(); | ||
| 61 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | ||
| 62 | } | ||
| 63 | |||
| 64 | UtilShaders::~UtilShaders() = default; | ||
| 65 | |||
| 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 67 | std::span<const SwizzleParameters> swizzles) { | ||
| 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||
| 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||
| 70 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||
| 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 72 | |||
| 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | ||
| 74 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||
| 76 | |||
| 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||
| 78 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 79 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 80 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 81 | |||
| 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 84 | |||
| 85 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||
| 86 | glUniform3uiv(0, 1, params.origin.data()); | ||
| 87 | glUniform3iv(1, 1, params.destination.data()); | ||
| 88 | glUniform1ui(2, params.bytes_per_block_log2); | ||
| 89 | glUniform1ui(3, params.layer_stride); | ||
| 90 | glUniform1ui(4, params.block_size); | ||
| 91 | glUniform1ui(5, params.x_shift); | ||
| 92 | glUniform1ui(6, params.block_height); | ||
| 93 | glUniform1ui(7, params.block_height_mask); | ||
| 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 95 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||
| 97 | GL_WRITE_ONLY, store_format); | ||
| 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||
| 99 | } | ||
| 100 | program_manager.RestoreGuestCompute(); | ||
| 101 | } | ||
| 102 | |||
| 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 104 | std::span<const SwizzleParameters> swizzles) { | ||
| 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | ||
| 106 | |||
| 107 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||
| 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||
| 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 110 | |||
| 111 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | ||
| 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||
| 114 | |||
| 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||
| 116 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 117 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 118 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 119 | |||
| 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 122 | const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); | ||
| 123 | |||
| 124 | const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); | ||
| 125 | glUniform3uiv(0, 1, params.origin.data()); | ||
| 126 | glUniform3iv(1, 1, params.destination.data()); | ||
| 127 | glUniform1ui(2, params.bytes_per_block_log2); | ||
| 128 | glUniform1ui(3, params.slice_size); | ||
| 129 | glUniform1ui(4, params.block_size); | ||
| 130 | glUniform1ui(5, params.x_shift); | ||
| 131 | glUniform1ui(6, params.block_height); | ||
| 132 | glUniform1ui(7, params.block_height_mask); | ||
| 133 | glUniform1ui(8, params.block_depth); | ||
| 134 | glUniform1ui(9, params.block_depth_mask); | ||
| 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 136 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||
| 138 | GL_WRITE_ONLY, store_format); | ||
| 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||
| 140 | } | ||
| 141 | program_manager.RestoreGuestCompute(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 145 | std::span<const SwizzleParameters> swizzles) { | ||
| 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||
| 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | ||
| 148 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 149 | static constexpr GLuint LOC_ORIGIN = 0; | ||
| 150 | static constexpr GLuint LOC_DESTINATION = 1; | ||
| 151 | static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; | ||
| 152 | static constexpr GLuint LOC_PITCH = 3; | ||
| 153 | |||
| 154 | const u32 bytes_per_block = BytesPerBlock(image.info.format); | ||
| 155 | const GLenum format = StoreFormat(bytes_per_block); | ||
| 156 | const u32 pitch = image.info.pitch; | ||
| 157 | |||
| 158 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), | ||
| 159 | "Non-power of two images are not implemented"); | ||
| 160 | |||
| 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | ||
| 162 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||
| 163 | glUniform2ui(LOC_ORIGIN, 0, 0); | ||
| 164 | glUniform2i(LOC_DESTINATION, 0, 0); | ||
| 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | ||
| 166 | glUniform1ui(LOC_PITCH, pitch); | ||
| 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | ||
| 168 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 169 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 170 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||
| 171 | |||
| 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||
| 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||
| 174 | |||
| 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||
| 176 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||
| 178 | } | ||
| 179 | program_manager.RestoreGuestCompute(); | ||
| 180 | } | ||
| 181 | |||
| 182 | void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { | ||
| 183 | static constexpr GLuint BINDING_INPUT_IMAGE = 0; | ||
| 184 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; | ||
| 185 | static constexpr GLuint LOC_SRC_OFFSET = 0; | ||
| 186 | static constexpr GLuint LOC_DST_OFFSET = 1; | ||
| 187 | |||
| 188 | program_manager.BindHostCompute(copy_bc4_program.handle); | ||
| 189 | |||
| 190 | for (const ImageCopy& copy : copies) { | ||
| 191 | ASSERT(copy.src_subresource.base_layer == 0); | ||
| 192 | ASSERT(copy.src_subresource.num_layers == 1); | ||
| 193 | ASSERT(copy.dst_subresource.base_layer == 0); | ||
| 194 | ASSERT(copy.dst_subresource.num_layers == 1); | ||
| 195 | |||
| 196 | glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); | ||
| 197 | glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); | ||
| 198 | glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, | ||
| 199 | GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); | ||
| 200 | glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), | ||
| 201 | copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); | ||
| 202 | glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | ||
| 203 | } | ||
| 204 | program_manager.RestoreGuestCompute(); | ||
| 205 | } | ||
| 206 | |||
| 207 | GLenum StoreFormat(u32 bytes_per_block) { | ||
| 208 | switch (bytes_per_block) { | ||
| 209 | case 1: | ||
| 210 | return GL_R8UI; | ||
| 211 | case 2: | ||
| 212 | return GL_R16UI; | ||
| 213 | case 4: | ||
| 214 | return GL_R32UI; | ||
| 215 | case 8: | ||
| 216 | return GL_RG32UI; | ||
| 217 | case 16: | ||
| 218 | return GL_RGBA32UI; | ||
| 219 | } | ||
| 220 | UNREACHABLE(); | ||
| 221 | return GL_R8UI; | ||
| 222 | } | ||
| 223 | |||
| 224 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | #include "video_core/texture_cache/types.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | class Image; | ||
| 18 | class ImageBufferMap; | ||
| 19 | class ProgramManager; | ||
| 20 | |||
| 21 | class UtilShaders { | ||
| 22 | public: | ||
| 23 | explicit UtilShaders(ProgramManager& program_manager); | ||
| 24 | ~UtilShaders(); | ||
| 25 | |||
| 26 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 27 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 28 | |||
| 29 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 30 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 31 | |||
| 32 | void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||
| 33 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 34 | |||
| 35 | void CopyBC4(Image& dst_image, Image& src_image, | ||
| 36 | std::span<const VideoCommon::ImageCopy> copies); | ||
| 37 | |||
| 38 | private: | ||
| 39 | ProgramManager& program_manager; | ||
| 40 | |||
| 41 | OGLBuffer swizzle_table_buffer; | ||
| 42 | |||
| 43 | OGLProgram block_linear_unswizzle_2d_program; | ||
| 44 | OGLProgram block_linear_unswizzle_3d_program; | ||
| 45 | OGLProgram pitch_unswizzle_program; | ||
| 46 | OGLProgram copy_bc4_program; | ||
| 47 | }; | ||
| 48 | |||
| 49 | GLenum StoreFormat(u32 bytes_per_block); | ||
| 50 | |||
| 51 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null | |||
| @@ -1,42 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { | ||
| 18 | if (!GLAD_GL_KHR_debug) { | ||
| 19 | // We don't need to throw an error as this is just for debugging | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string object_label; | ||
| 24 | if (extra_info.empty()) { | ||
| 25 | switch (identifier) { | ||
| 26 | case GL_TEXTURE: | ||
| 27 | object_label = fmt::format("Texture@0x{:016X}", addr); | ||
| 28 | break; | ||
| 29 | case GL_PROGRAM: | ||
| 30 | object_label = fmt::format("Shader@0x{:016X}", addr); | ||
| 31 | break; | ||
| 32 | default: | ||
| 33 | object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | } else { | ||
| 37 | object_label = fmt::format("{}@0x{:016X}", extra_info, addr); | ||
| 38 | } | ||
| 39 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null | |||
| @@ -1,16 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string_view> | ||
| 8 | #include <vector> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | ||
| 15 | |||
| 16 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..1f6a169ae --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -0,0 +1,624 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" | ||
| 8 | #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" | ||
| 9 | #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" | ||
| 10 | #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" | ||
| 11 | #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" | ||
| 12 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 19 | #include "video_core/surface.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 22 | |||
| 23 | namespace Vulkan { | ||
| 24 | |||
| 25 | using VideoCommon::ImageViewType; | ||
| 26 | |||
| 27 | namespace { | ||
| 28 | struct PushConstants { | ||
| 29 | std::array<float, 2> tex_scale; | ||
| 30 | std::array<float, 2> tex_offset; | ||
| 31 | }; | ||
| 32 | |||
| 33 | template <u32 binding> | ||
| 34 | inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ | ||
| 35 | .binding = binding, | ||
| 36 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 37 | .descriptorCount = 1, | ||
| 38 | .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 39 | .pImmutableSamplers = nullptr, | ||
| 40 | }; | ||
| 41 | constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ | ||
| 42 | TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | ||
| 43 | TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, | ||
| 44 | }; | ||
| 45 | constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | ||
| 46 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 47 | .pNext = nullptr, | ||
| 48 | .flags = 0, | ||
| 49 | .bindingCount = 1, | ||
| 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | ||
| 51 | }; | ||
| 52 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | ||
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 54 | .pNext = nullptr, | ||
| 55 | .flags = 0, | ||
| 56 | .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), | ||
| 57 | .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), | ||
| 58 | }; | ||
| 59 | constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ | ||
| 60 | .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 61 | .offset = 0, | ||
| 62 | .size = sizeof(PushConstants), | ||
| 63 | }; | ||
| 64 | constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ | ||
| 65 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 66 | .pNext = nullptr, | ||
| 67 | .flags = 0, | ||
| 68 | .vertexBindingDescriptionCount = 0, | ||
| 69 | .pVertexBindingDescriptions = nullptr, | ||
| 70 | .vertexAttributeDescriptionCount = 0, | ||
| 71 | .pVertexAttributeDescriptions = nullptr, | ||
| 72 | }; | ||
| 73 | constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ | ||
| 74 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 75 | .pNext = nullptr, | ||
| 76 | .flags = 0, | ||
| 77 | .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | ||
| 78 | .primitiveRestartEnable = VK_FALSE, | ||
| 79 | }; | ||
| 80 | constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ | ||
| 81 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 82 | .pNext = nullptr, | ||
| 83 | .flags = 0, | ||
| 84 | .viewportCount = 1, | ||
| 85 | .pViewports = nullptr, | ||
| 86 | .scissorCount = 1, | ||
| 87 | .pScissors = nullptr, | ||
| 88 | }; | ||
| 89 | constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ | ||
| 90 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 91 | .pNext = nullptr, | ||
| 92 | .flags = 0, | ||
| 93 | .depthClampEnable = VK_FALSE, | ||
| 94 | .rasterizerDiscardEnable = VK_FALSE, | ||
| 95 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 96 | .cullMode = VK_CULL_MODE_BACK_BIT, | ||
| 97 | .frontFace = VK_FRONT_FACE_CLOCKWISE, | ||
| 98 | .depthBiasEnable = VK_FALSE, | ||
| 99 | .depthBiasConstantFactor = 0.0f, | ||
| 100 | .depthBiasClamp = 0.0f, | ||
| 101 | .depthBiasSlopeFactor = 0.0f, | ||
| 102 | .lineWidth = 1.0f, | ||
| 103 | }; | ||
| 104 | constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ | ||
| 105 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 106 | .pNext = nullptr, | ||
| 107 | .flags = 0, | ||
| 108 | .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, | ||
| 109 | .sampleShadingEnable = VK_FALSE, | ||
| 110 | .minSampleShading = 0.0f, | ||
| 111 | .pSampleMask = nullptr, | ||
| 112 | .alphaToCoverageEnable = VK_FALSE, | ||
| 113 | .alphaToOneEnable = VK_FALSE, | ||
| 114 | }; | ||
| 115 | constexpr std::array DYNAMIC_STATES{ | ||
| 116 | VK_DYNAMIC_STATE_VIEWPORT, | ||
| 117 | VK_DYNAMIC_STATE_SCISSOR, | ||
| 118 | }; | ||
| 119 | constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ | ||
| 120 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 121 | .pNext = nullptr, | ||
| 122 | .flags = 0, | ||
| 123 | .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()), | ||
| 124 | .pDynamicStates = DYNAMIC_STATES.data(), | ||
| 125 | }; | ||
| 126 | constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 128 | .pNext = nullptr, | ||
| 129 | .flags = 0, | ||
| 130 | .logicOpEnable = VK_FALSE, | ||
| 131 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 132 | .attachmentCount = 0, | ||
| 133 | .pAttachments = nullptr, | ||
| 134 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 135 | }; | ||
| 136 | constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ | ||
| 137 | .blendEnable = VK_FALSE, | ||
| 138 | .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 139 | .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 140 | .colorBlendOp = VK_BLEND_OP_ADD, | ||
| 141 | .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 142 | .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 143 | .alphaBlendOp = VK_BLEND_OP_ADD, | ||
| 144 | .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 145 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, | ||
| 146 | }; | ||
| 147 | constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ | ||
| 148 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 149 | .pNext = nullptr, | ||
| 150 | .flags = 0, | ||
| 151 | .logicOpEnable = VK_FALSE, | ||
| 152 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 153 | .attachmentCount = 1, | ||
| 154 | .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, | ||
| 155 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 156 | }; | ||
| 157 | constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ | ||
| 158 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 159 | .pNext = nullptr, | ||
| 160 | .flags = 0, | ||
| 161 | .depthTestEnable = VK_TRUE, | ||
| 162 | .depthWriteEnable = VK_TRUE, | ||
| 163 | .depthCompareOp = VK_COMPARE_OP_ALWAYS, | ||
| 164 | .depthBoundsTestEnable = VK_FALSE, | ||
| 165 | .stencilTestEnable = VK_FALSE, | ||
| 166 | .front = VkStencilOpState{}, | ||
| 167 | .back = VkStencilOpState{}, | ||
| 168 | .minDepthBounds = 0.0f, | ||
| 169 | .maxDepthBounds = 0.0f, | ||
| 170 | }; | ||
| 171 | |||
| 172 | template <VkFilter filter> | ||
| 173 | inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ | ||
| 174 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 175 | .pNext = nullptr, | ||
| 176 | .flags = 0, | ||
| 177 | .magFilter = filter, | ||
| 178 | .minFilter = filter, | ||
| 179 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, | ||
| 180 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 181 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 182 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 183 | .mipLodBias = 0.0f, | ||
| 184 | .anisotropyEnable = VK_FALSE, | ||
| 185 | .maxAnisotropy = 0.0f, | ||
| 186 | .compareEnable = VK_FALSE, | ||
| 187 | .compareOp = VK_COMPARE_OP_NEVER, | ||
| 188 | .minLod = 0.0f, | ||
| 189 | .maxLod = 0.0f, | ||
| 190 | .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, | ||
| 191 | .unnormalizedCoordinates = VK_TRUE, | ||
| 192 | }; | ||
| 193 | |||
| 194 | constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( | ||
| 195 | const VkDescriptorSetLayout* set_layout) { | ||
| 196 | return VkPipelineLayoutCreateInfo{ | ||
| 197 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 198 | .pNext = nullptr, | ||
| 199 | .flags = 0, | ||
| 200 | .setLayoutCount = 1, | ||
| 201 | .pSetLayouts = set_layout, | ||
| 202 | .pushConstantRangeCount = 1, | ||
| 203 | .pPushConstantRanges = &PUSH_CONSTANT_RANGE, | ||
| 204 | }; | ||
| 205 | } | ||
| 206 | |||
| 207 | constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, | ||
| 208 | VkShaderModule shader) { | ||
| 209 | return VkPipelineShaderStageCreateInfo{ | ||
| 210 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 211 | .pNext = nullptr, | ||
| 212 | .flags = 0, | ||
| 213 | .stage = stage, | ||
| 214 | .module = shader, | ||
| 215 | .pName = "main", | ||
| 216 | .pSpecializationInfo = nullptr, | ||
| 217 | }; | ||
| 218 | } | ||
| 219 | |||
| 220 | constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages( | ||
| 221 | VkShaderModule vertex_shader, VkShaderModule fragment_shader) { | ||
| 222 | return std::array{ | ||
| 223 | PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), | ||
| 224 | PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), | ||
| 225 | }; | ||
| 226 | } | ||
| 227 | |||
| 228 | void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, | ||
| 229 | VkSampler sampler, VkImageView image_view) { | ||
| 230 | const VkDescriptorImageInfo image_info{ | ||
| 231 | .sampler = sampler, | ||
| 232 | .imageView = image_view, | ||
| 233 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 234 | }; | ||
| 235 | const VkWriteDescriptorSet write_descriptor_set{ | ||
| 236 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 237 | .pNext = nullptr, | ||
| 238 | .dstSet = descriptor_set, | ||
| 239 | .dstBinding = 0, | ||
| 240 | .dstArrayElement = 0, | ||
| 241 | .descriptorCount = 1, | ||
| 242 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 243 | .pImageInfo = &image_info, | ||
| 244 | .pBufferInfo = nullptr, | ||
| 245 | .pTexelBufferView = nullptr, | ||
| 246 | }; | ||
| 247 | device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); | ||
| 248 | } | ||
| 249 | |||
| 250 | void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, | ||
| 251 | VkSampler sampler, VkImageView image_view_0, | ||
| 252 | VkImageView image_view_1) { | ||
| 253 | const VkDescriptorImageInfo image_info_0{ | ||
| 254 | .sampler = sampler, | ||
| 255 | .imageView = image_view_0, | ||
| 256 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 257 | }; | ||
| 258 | const VkDescriptorImageInfo image_info_1{ | ||
| 259 | .sampler = sampler, | ||
| 260 | .imageView = image_view_1, | ||
| 261 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 262 | }; | ||
| 263 | const std::array write_descriptor_sets{ | ||
| 264 | VkWriteDescriptorSet{ | ||
| 265 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 266 | .pNext = nullptr, | ||
| 267 | .dstSet = descriptor_set, | ||
| 268 | .dstBinding = 0, | ||
| 269 | .dstArrayElement = 0, | ||
| 270 | .descriptorCount = 1, | ||
| 271 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 272 | .pImageInfo = &image_info_0, | ||
| 273 | .pBufferInfo = nullptr, | ||
| 274 | .pTexelBufferView = nullptr, | ||
| 275 | }, | ||
| 276 | VkWriteDescriptorSet{ | ||
| 277 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 278 | .pNext = nullptr, | ||
| 279 | .dstSet = descriptor_set, | ||
| 280 | .dstBinding = 1, | ||
| 281 | .dstArrayElement = 0, | ||
| 282 | .descriptorCount = 1, | ||
| 283 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 284 | .pImageInfo = &image_info_1, | ||
| 285 | .pBufferInfo = nullptr, | ||
| 286 | .pTexelBufferView = nullptr, | ||
| 287 | }, | ||
| 288 | }; | ||
| 289 | device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); | ||
| 290 | } | ||
| 291 | |||
| 292 | void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, | ||
| 293 | const std::array<Offset2D, 2>& dst_region, | ||
| 294 | const std::array<Offset2D, 2>& src_region) { | ||
| 295 | const VkOffset2D offset{ | ||
| 296 | .x = std::min(dst_region[0].x, dst_region[1].x), | ||
| 297 | .y = std::min(dst_region[0].y, dst_region[1].y), | ||
| 298 | }; | ||
| 299 | const VkExtent2D extent{ | ||
| 300 | .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), | ||
| 301 | .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), | ||
| 302 | }; | ||
| 303 | const VkViewport viewport{ | ||
| 304 | .x = static_cast<float>(offset.x), | ||
| 305 | .y = static_cast<float>(offset.y), | ||
| 306 | .width = static_cast<float>(extent.width), | ||
| 307 | .height = static_cast<float>(extent.height), | ||
| 308 | .minDepth = 0.0f, | ||
| 309 | .maxDepth = 1.0f, | ||
| 310 | }; | ||
| 311 | // TODO: Support scissored blits | ||
| 312 | const VkRect2D scissor{ | ||
| 313 | .offset = offset, | ||
| 314 | .extent = extent, | ||
| 315 | }; | ||
| 316 | const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); | ||
| 317 | const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); | ||
| 318 | const PushConstants push_constants{ | ||
| 319 | .tex_scale = {scale_x, scale_y}, | ||
| 320 | .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, | ||
| 321 | }; | ||
| 322 | cmdbuf.SetViewport(0, viewport); | ||
| 323 | cmdbuf.SetScissor(0, scissor); | ||
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 325 | } | ||
| 326 | |||
| 327 | } // Anonymous namespace | ||
| 328 | |||
| 329 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | ||
| 330 | StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) | ||
| 331 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, | ||
| 332 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( | ||
| 333 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | ||
| 334 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( | ||
| 335 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | ||
| 336 | one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), | ||
| 337 | two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), | ||
| 338 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( | ||
| 339 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), | ||
| 340 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( | ||
| 341 | PipelineLayoutCreateInfo(two_textures_set_layout.address()))), | ||
| 342 | full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), | ||
| 343 | blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), | ||
| 344 | convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), | ||
| 345 | convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), | ||
| 346 | linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), | ||
| 347 | nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { | ||
| 348 | if (device.IsExtShaderStencilExportSupported()) { | ||
| 349 | blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | BlitImageHelper::~BlitImageHelper() = default; | ||
| 354 | |||
| 355 | void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | ||
| 356 | const std::array<Offset2D, 2>& dst_region, | ||
| 357 | const std::array<Offset2D, 2>& src_region, | ||
| 358 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 359 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 360 | const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; | ||
| 361 | const BlitImagePipelineKey key{ | ||
| 362 | .renderpass = dst_framebuffer->RenderPass(), | ||
| 363 | .operation = operation, | ||
| 364 | }; | ||
| 365 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | ||
| 366 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | ||
| 367 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; | ||
| 368 | const VkPipeline pipeline = FindOrEmplacePipeline(key); | ||
| 369 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 370 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 371 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, | ||
| 372 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 373 | // TODO: Barriers | ||
| 374 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | ||
| 375 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 376 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 377 | nullptr); | ||
| 378 | BindBlitState(cmdbuf, layout, dst_region, src_region); | ||
| 379 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 380 | }); | ||
| 381 | scheduler.InvalidateState(); | ||
| 382 | } | ||
| 383 | |||
| 384 | void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | ||
| 385 | VkImageView src_depth_view, VkImageView src_stencil_view, | ||
| 386 | const std::array<Offset2D, 2>& dst_region, | ||
| 387 | const std::array<Offset2D, 2>& src_region, | ||
| 388 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 389 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 390 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); | ||
| 391 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); | ||
| 392 | |||
| 393 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | ||
| 394 | const VkSampler sampler = *nearest_sampler; | ||
| 395 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); | ||
| 396 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 397 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 398 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, | ||
| 399 | src_stencil_view, descriptor_set, | ||
| 400 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 401 | // TODO: Barriers | ||
| 402 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, | ||
| 403 | src_stencil_view); | ||
| 404 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 405 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 406 | nullptr); | ||
| 407 | BindBlitState(cmdbuf, layout, dst_region, src_region); | ||
| 408 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 409 | }); | ||
| 410 | scheduler.InvalidateState(); | ||
| 411 | } | ||
| 412 | |||
| 413 | void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, | ||
| 414 | const ImageView& src_image_view) { | ||
| 415 | ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); | ||
| 416 | Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); | ||
| 417 | } | ||
| 418 | |||
| 419 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, | ||
| 420 | const ImageView& src_image_view) { | ||
| 421 | |||
| 422 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); | ||
| 423 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); | ||
| 424 | } | ||
| 425 | |||
| 426 | void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, | ||
| 427 | const ImageView& src_image_view) { | ||
| 428 | ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); | ||
| 429 | Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); | ||
| 430 | } | ||
| 431 | |||
| 432 | void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | ||
| 433 | const ImageView& src_image_view) { | ||
| 434 | ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); | ||
| 435 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); | ||
| 436 | } | ||
| 437 | |||
| 438 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 439 | const ImageView& src_image_view) { | ||
| 440 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | ||
| 441 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | ||
| 442 | const VkSampler sampler = *nearest_sampler; | ||
| 443 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 444 | const VkExtent2D extent{ | ||
| 445 | .width = src_image_view.size.width, | ||
| 446 | .height = src_image_view.size.height, | ||
| 447 | }; | ||
| 448 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 449 | scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, | ||
| 450 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 451 | const VkOffset2D offset{ | ||
| 452 | .x = 0, | ||
| 453 | .y = 0, | ||
| 454 | }; | ||
| 455 | const VkViewport viewport{ | ||
| 456 | .x = 0.0f, | ||
| 457 | .y = 0.0f, | ||
| 458 | .width = static_cast<float>(extent.width), | ||
| 459 | .height = static_cast<float>(extent.height), | ||
| 460 | .minDepth = 0.0f, | ||
| 461 | .maxDepth = 0.0f, | ||
| 462 | }; | ||
| 463 | const VkRect2D scissor{ | ||
| 464 | .offset = offset, | ||
| 465 | .extent = extent, | ||
| 466 | }; | ||
| 467 | const PushConstants push_constants{ | ||
| 468 | .tex_scale = {viewport.width, viewport.height}, | ||
| 469 | .tex_offset = {0.0f, 0.0f}, | ||
| 470 | }; | ||
| 471 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | ||
| 472 | |||
| 473 | // TODO: Barriers | ||
| 474 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 475 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 476 | nullptr); | ||
| 477 | cmdbuf.SetViewport(0, viewport); | ||
| 478 | cmdbuf.SetScissor(0, scissor); | ||
| 479 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 480 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 481 | }); | ||
| 482 | scheduler.InvalidateState(); | ||
| 483 | } | ||
| 484 | |||
| 485 | VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { | ||
| 486 | const auto it = std::ranges::find(blit_color_keys, key); | ||
| 487 | if (it != blit_color_keys.end()) { | ||
| 488 | return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; | ||
| 489 | } | ||
| 490 | blit_color_keys.push_back(key); | ||
| 491 | |||
| 492 | const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); | ||
| 493 | const VkPipelineColorBlendAttachmentState blend_attachment{ | ||
| 494 | .blendEnable = VK_FALSE, | ||
| 495 | .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 496 | .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 497 | .colorBlendOp = VK_BLEND_OP_ADD, | ||
| 498 | .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 499 | .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 500 | .alphaBlendOp = VK_BLEND_OP_ADD, | ||
| 501 | .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 502 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, | ||
| 503 | }; | ||
| 504 | // TODO: programmable blending | ||
| 505 | const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ | ||
| 506 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 507 | .pNext = nullptr, | ||
| 508 | .flags = 0, | ||
| 509 | .logicOpEnable = VK_FALSE, | ||
| 510 | .logicOp = VK_LOGIC_OP_CLEAR, | ||
| 511 | .attachmentCount = 1, | ||
| 512 | .pAttachments = &blend_attachment, | ||
| 513 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 514 | }; | ||
| 515 | blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ | ||
| 516 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 517 | .pNext = nullptr, | ||
| 518 | .flags = 0, | ||
| 519 | .stageCount = static_cast<u32>(stages.size()), | ||
| 520 | .pStages = stages.data(), | ||
| 521 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 522 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 523 | .pTessellationState = nullptr, | ||
| 524 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 525 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 526 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 527 | .pDepthStencilState = nullptr, | ||
| 528 | .pColorBlendState = &color_blend_create_info, | ||
| 529 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 530 | .layout = *one_texture_pipeline_layout, | ||
| 531 | .renderPass = key.renderpass, | ||
| 532 | .subpass = 0, | ||
| 533 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 534 | .basePipelineIndex = 0, | ||
| 535 | })); | ||
| 536 | return *blit_color_pipelines.back(); | ||
| 537 | } | ||
| 538 | |||
| 539 | VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { | ||
| 540 | if (blit_depth_stencil_pipeline) { | ||
| 541 | return *blit_depth_stencil_pipeline; | ||
| 542 | } | ||
| 543 | const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); | ||
| 544 | blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 545 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 546 | .pNext = nullptr, | ||
| 547 | .flags = 0, | ||
| 548 | .stageCount = static_cast<u32>(stages.size()), | ||
| 549 | .pStages = stages.data(), | ||
| 550 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 551 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 552 | .pTessellationState = nullptr, | ||
| 553 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 554 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 555 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 556 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 557 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, | ||
| 558 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 559 | .layout = *two_textures_pipeline_layout, | ||
| 560 | .renderPass = renderpass, | ||
| 561 | .subpass = 0, | ||
| 562 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 563 | .basePipelineIndex = 0, | ||
| 564 | }); | ||
| 565 | return *blit_depth_stencil_pipeline; | ||
| 566 | } | ||
| 567 | |||
| 568 | void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { | ||
| 569 | if (pipeline) { | ||
| 570 | return; | ||
| 571 | } | ||
| 572 | const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); | ||
| 573 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 574 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 575 | .pNext = nullptr, | ||
| 576 | .flags = 0, | ||
| 577 | .stageCount = static_cast<u32>(stages.size()), | ||
| 578 | .pStages = stages.data(), | ||
| 579 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 580 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 581 | .pTessellationState = nullptr, | ||
| 582 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 583 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 584 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 585 | .pDepthStencilState = nullptr, | ||
| 586 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, | ||
| 587 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 588 | .layout = *one_texture_pipeline_layout, | ||
| 589 | .renderPass = renderpass, | ||
| 590 | .subpass = 0, | ||
| 591 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 592 | .basePipelineIndex = 0, | ||
| 593 | }); | ||
| 594 | } | ||
| 595 | |||
| 596 | void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { | ||
| 597 | if (pipeline) { | ||
| 598 | return; | ||
| 599 | } | ||
| 600 | const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); | ||
| 601 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 602 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 603 | .pNext = nullptr, | ||
| 604 | .flags = 0, | ||
| 605 | .stageCount = static_cast<u32>(stages.size()), | ||
| 606 | .pStages = stages.data(), | ||
| 607 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 608 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 609 | .pTessellationState = nullptr, | ||
| 610 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 611 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 612 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 613 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 614 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, | ||
| 615 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 616 | .layout = *one_texture_pipeline_layout, | ||
| 617 | .renderPass = renderpass, | ||
| 618 | .subpass = 0, | ||
| 619 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 620 | .basePipelineIndex = 0, | ||
| 621 | }); | ||
| 622 | } | ||
| 623 | |||
| 624 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..43fd3d737 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | |||
| 9 | #include "video_core/engines/fermi_2d.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | using VideoCommon::Offset2D; | ||
| 17 | |||
| 18 | class Device; | ||
| 19 | class Framebuffer; | ||
| 20 | class ImageView; | ||
| 21 | class StateTracker; | ||
| 22 | class VKScheduler; | ||
| 23 | |||
| 24 | struct BlitImagePipelineKey { | ||
| 25 | constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; | ||
| 26 | |||
| 27 | VkRenderPass renderpass; | ||
| 28 | Tegra::Engines::Fermi2D::Operation operation; | ||
| 29 | }; | ||
| 30 | |||
| 31 | class BlitImageHelper { | ||
| 32 | public: | ||
| 33 | explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, | ||
| 34 | StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); | ||
| 35 | ~BlitImageHelper(); | ||
| 36 | |||
| 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | ||
| 38 | const std::array<Offset2D, 2>& dst_region, | ||
| 39 | const std::array<Offset2D, 2>& src_region, | ||
| 40 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 41 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 42 | |||
| 43 | void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, | ||
| 44 | VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, | ||
| 45 | const std::array<Offset2D, 2>& src_region, | ||
| 46 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 47 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 48 | |||
| 49 | void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 50 | |||
| 51 | void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 52 | |||
| 53 | void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 54 | |||
| 55 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | ||
| 56 | |||
| 57 | private: | ||
| 58 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 59 | const ImageView& src_image_view); | ||
| 60 | |||
| 61 | [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); | ||
| 62 | |||
| 63 | [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); | ||
| 64 | |||
| 65 | void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | ||
| 66 | |||
| 67 | void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | ||
| 68 | |||
| 69 | const Device& device; | ||
| 70 | VKScheduler& scheduler; | ||
| 71 | StateTracker& state_tracker; | ||
| 72 | |||
| 73 | vk::DescriptorSetLayout one_texture_set_layout; | ||
| 74 | vk::DescriptorSetLayout two_textures_set_layout; | ||
| 75 | DescriptorAllocator one_texture_descriptor_allocator; | ||
| 76 | DescriptorAllocator two_textures_descriptor_allocator; | ||
| 77 | vk::PipelineLayout one_texture_pipeline_layout; | ||
| 78 | vk::PipelineLayout two_textures_pipeline_layout; | ||
| 79 | vk::ShaderModule full_screen_vert; | ||
| 80 | vk::ShaderModule blit_color_to_color_frag; | ||
| 81 | vk::ShaderModule blit_depth_stencil_frag; | ||
| 82 | vk::ShaderModule convert_depth_to_float_frag; | ||
| 83 | vk::ShaderModule convert_float_to_depth_frag; | ||
| 84 | vk::Sampler linear_sampler; | ||
| 85 | vk::Sampler nearest_sampler; | ||
| 86 | |||
| 87 | std::vector<BlitImagePipelineKey> blit_color_keys; | ||
| 88 | std::vector<vk::Pipeline> blit_color_pipelines; | ||
| 89 | vk::Pipeline blit_depth_stencil_pipeline; | ||
| 90 | vk::Pipeline convert_d32_to_r32_pipeline; | ||
| 91 | vk::Pipeline convert_r32_to_d32_pipeline; | ||
| 92 | vk::Pipeline convert_d16_to_r16_pipeline; | ||
| 93 | vk::Pipeline convert_r16_to_d16_pipeline; | ||
| 94 | }; | ||
| 95 | |||
| 96 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..5be6dabd9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta | |||
| 60 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); | 60 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); |
| 61 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | 61 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); |
| 62 | topology.Assign(regs.draw.topology); | 62 | topology.Assign(regs.draw.topology); |
| 63 | msaa_mode.Assign(regs.multisample_mode); | ||
| 63 | 64 | ||
| 64 | raw2 = 0; | 65 | raw2 = 0; |
| 65 | const auto test_func = | 66 | const auto test_func = |
| @@ -75,7 +76,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta | |||
| 75 | regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; | 76 | regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; |
| 76 | } | 77 | } |
| 77 | 78 | ||
| 78 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 79 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { |
| 79 | const auto& input = regs.vertex_attrib_format[index]; | 80 | const auto& input = regs.vertex_attrib_format[index]; |
| 80 | auto& attribute = attributes[index]; | 81 | auto& attribute = attributes[index]; |
| 81 | attribute.raw = 0; | 82 | attribute.raw = 0; |
| @@ -84,6 +85,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta | |||
| 84 | attribute.offset.Assign(input.offset); | 85 | attribute.offset.Assign(input.offset); |
| 85 | attribute.type.Assign(static_cast<u32>(input.type.Value())); | 86 | attribute.type.Assign(static_cast<u32>(input.type.Value())); |
| 86 | attribute.size.Assign(static_cast<u32>(input.size.Value())); | 87 | attribute.size.Assign(static_cast<u32>(input.size.Value())); |
| 88 | attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); | ||
| 87 | } | 89 | } |
| 88 | 90 | ||
| 89 | for (std::size_t index = 0; index < std::size(attachments); ++index) { | 91 | for (std::size_t index = 0; index < std::size(attachments); ++index) { |
| @@ -171,14 +173,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { | |||
| 171 | depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); | 173 | depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); |
| 172 | cull_face.Assign(PackCullFace(regs.cull_face)); | 174 | cull_face.Assign(PackCullFace(regs.cull_face)); |
| 173 | cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); | 175 | cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); |
| 174 | 176 | std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) { | |
| 175 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 177 | return static_cast<u16>(array.stride.Value()); |
| 176 | const auto& input = regs.vertex_array[index]; | 178 | }); |
| 177 | VertexBinding& binding = vertex_bindings[index]; | ||
| 178 | binding.raw = 0; | ||
| 179 | binding.enabled.Assign(input.IsEnabled() ? 1 : 0); | ||
| 180 | binding.stride.Assign(static_cast<u16>(input.stride.Value())); | ||
| 181 | } | ||
| 182 | } | 179 | } |
| 183 | 180 | ||
| 184 | std::size_t FixedPipelineState::Hash() const noexcept { | 181 | std::size_t FixedPipelineState::Hash() const noexcept { |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..465a55fdb 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -96,6 +96,8 @@ struct FixedPipelineState { | |||
| 96 | BitField<6, 14, u32> offset; | 96 | BitField<6, 14, u32> offset; |
| 97 | BitField<20, 3, u32> type; | 97 | BitField<20, 3, u32> type; |
| 98 | BitField<23, 6, u32> size; | 98 | BitField<23, 6, u32> size; |
| 99 | // Not really an element of a vertex attribute, but it can be packed here | ||
| 100 | BitField<29, 1, u32> binding_index_enabled; | ||
| 99 | 101 | ||
| 100 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { | 102 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { |
| 101 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); | 103 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); |
| @@ -130,12 +132,6 @@ struct FixedPipelineState { | |||
| 130 | } | 132 | } |
| 131 | }; | 133 | }; |
| 132 | 134 | ||
| 133 | union VertexBinding { | ||
| 134 | u16 raw; | ||
| 135 | BitField<0, 12, u16> stride; | ||
| 136 | BitField<12, 1, u16> enabled; | ||
| 137 | }; | ||
| 138 | |||
| 139 | struct DynamicState { | 135 | struct DynamicState { |
| 140 | union { | 136 | union { |
| 141 | u32 raw1; | 137 | u32 raw1; |
| @@ -153,7 +149,8 @@ struct FixedPipelineState { | |||
| 153 | BitField<0, 2, u32> cull_face; | 149 | BitField<0, 2, u32> cull_face; |
| 154 | BitField<2, 1, u32> cull_enable; | 150 | BitField<2, 1, u32> cull_enable; |
| 155 | }; | 151 | }; |
| 156 | std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; | 152 | // Vertex stride is a 12 bits value, we have 4 bits to spare per element |
| 153 | std::array<u16, Maxwell::NumVertexArrays> vertex_strides; | ||
| 157 | 154 | ||
| 158 | void Fill(const Maxwell& regs); | 155 | void Fill(const Maxwell& regs); |
| 159 | 156 | ||
| @@ -186,6 +183,7 @@ struct FixedPipelineState { | |||
| 186 | BitField<19, 4, u32> logic_op; | 183 | BitField<19, 4, u32> logic_op; |
| 187 | BitField<23, 1, u32> rasterize_enable; | 184 | BitField<23, 1, u32> rasterize_enable; |
| 188 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; | 185 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; |
| 186 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; | ||
| 189 | }; | 187 | }; |
| 190 | union { | 188 | union { |
| 191 | u32 raw2; | 189 | u32 raw2; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 58e117eb3..ca7c2c579 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -9,9 +9,9 @@ | |||
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | #include "video_core/surface.h" | 12 | #include "video_core/surface.h" |
| 13 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 15 | 15 | ||
| 16 | namespace Vulkan::MaxwellToVK { | 16 | namespace Vulkan::MaxwellToVK { |
| 17 | 17 | ||
| @@ -47,7 +47,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter | |||
| 47 | return {}; | 47 | return {}; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, | 50 | VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, |
| 51 | Tegra::Texture::TextureFilter filter) { | 51 | Tegra::Texture::TextureFilter filter) { |
| 52 | switch (wrap_mode) { | 52 | switch (wrap_mode) { |
| 53 | case Tegra::Texture::WrapMode::Wrap: | 53 | case Tegra::Texture::WrapMode::Wrap: |
| @@ -122,7 +122,7 @@ struct FormatTuple { | |||
| 122 | {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT | 122 | {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT |
| 123 | {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT | 123 | {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT |
| 124 | {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM | 124 | {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM |
| 125 | {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM | 125 | {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM |
| 126 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM | 126 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM |
| 127 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM | 127 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM |
| 128 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT | 128 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT |
| @@ -163,7 +163,7 @@ struct FormatTuple { | |||
| 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM | 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM |
| 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT | 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT |
| 165 | {VK_FORMAT_UNDEFINED}, // R16G16_UINT | 165 | {VK_FORMAT_UNDEFINED}, // R16G16_UINT |
| 166 | {VK_FORMAT_UNDEFINED}, // R16G16_SINT | 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT |
| 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM | 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM |
| 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT | 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT |
| 169 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB | 169 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB |
| @@ -222,7 +222,7 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) { | |||
| 222 | 222 | ||
| 223 | } // Anonymous namespace | 223 | } // Anonymous namespace |
| 224 | 224 | ||
| 225 | FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) { | 225 | FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format) { |
| 226 | ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); | 226 | ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); |
| 227 | 227 | ||
| 228 | auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; | 228 | auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; |
| @@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo | |||
| 233 | 233 | ||
| 234 | // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively | 234 | // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively |
| 235 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { | 235 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { |
| 236 | tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) | 236 | const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); |
| 237 | ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 | 237 | tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32; |
| 238 | : VK_FORMAT_A8B8G8R8_UNORM_PACK32; | ||
| 239 | } | 238 | } |
| 240 | const bool attachable = tuple.usage & Attachable; | 239 | const bool attachable = tuple.usage & Attachable; |
| 241 | const bool storage = tuple.usage & Storage; | 240 | const bool storage = tuple.usage & Storage; |
| 242 | 241 | ||
| 243 | VkFormatFeatureFlags usage; | 242 | VkFormatFeatureFlags usage{}; |
| 244 | if (format_type == FormatType::Buffer) { | 243 | switch (format_type) { |
| 244 | case FormatType::Buffer: | ||
| 245 | usage = | 245 | usage = |
| 246 | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; | 246 | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; |
| 247 | } else { | 247 | break; |
| 248 | case FormatType::Linear: | ||
| 249 | case FormatType::Optimal: | ||
| 248 | usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | | 250 | usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | |
| 249 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; | 251 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; |
| 250 | if (attachable) { | 252 | if (attachable) { |
| @@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo | |||
| 254 | if (storage) { | 256 | if (storage) { |
| 255 | usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; | 257 | usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; |
| 256 | } | 258 | } |
| 259 | break; | ||
| 257 | } | 260 | } |
| 258 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; | 261 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; |
| 259 | } | 262 | } |
| @@ -277,7 +280,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | |||
| 277 | return {}; | 280 | return {}; |
| 278 | } | 281 | } |
| 279 | 282 | ||
| 280 | VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, | 283 | VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, |
| 281 | Maxwell::PrimitiveTopology topology) { | 284 | Maxwell::PrimitiveTopology topology) { |
| 282 | switch (topology) { | 285 | switch (topology) { |
| 283 | case Maxwell::PrimitiveTopology::Points: | 286 | case Maxwell::PrimitiveTopology::Points: |
| @@ -523,7 +526,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | |||
| 523 | return {}; | 526 | return {}; |
| 524 | } | 527 | } |
| 525 | 528 | ||
| 526 | VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { | 529 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { |
| 527 | switch (index_format) { | 530 | switch (index_format) { |
| 528 | case Maxwell::IndexFormat::UnsignedByte: | 531 | case Maxwell::IndexFormat::UnsignedByte: |
| 529 | if (!device.IsExtIndexTypeUint8Supported()) { | 532 | if (!device.IsExtIndexTypeUint8Supported()) { |
| @@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) | |||
| 724 | return {}; | 727 | return {}; |
| 725 | } | 728 | } |
| 726 | 729 | ||
| 730 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { | ||
| 731 | switch (reduction) { | ||
| 732 | case Tegra::Texture::SamplerReduction::WeightedAverage: | ||
| 733 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | ||
| 734 | case Tegra::Texture::SamplerReduction::Min: | ||
| 735 | return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; | ||
| 736 | case Tegra::Texture::SamplerReduction::Max: | ||
| 737 | return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; | ||
| 738 | } | ||
| 739 | UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction)); | ||
| 740 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | ||
| 741 | } | ||
| 742 | |||
| 727 | } // namespace Vulkan::MaxwellToVK | 743 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..537969840 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 11 | #include "video_core/surface.h" | 9 | #include "video_core/surface.h" |
| 12 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 11 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan::MaxwellToVK { | 14 | namespace Vulkan::MaxwellToVK { |
| 15 | 15 | ||
| @@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter); | |||
| 22 | 22 | ||
| 23 | VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | 23 | VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); |
| 24 | 24 | ||
| 25 | VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, | 25 | VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode, |
| 26 | Tegra::Texture::TextureFilter filter); | 26 | Tegra::Texture::TextureFilter filter); |
| 27 | 27 | ||
| 28 | VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | 28 | VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); |
| @@ -35,17 +35,17 @@ struct FormatInfo { | |||
| 35 | bool storage; | 35 | bool storage; |
| 36 | }; | 36 | }; |
| 37 | 37 | ||
| 38 | FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); | 38 | FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format); |
| 39 | 39 | ||
| 40 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); | 40 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); |
| 41 | 41 | ||
| 42 | VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); | 42 | VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); |
| 43 | 43 | ||
| 44 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); | 44 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); |
| 45 | 45 | ||
| 46 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | 46 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); |
| 47 | 47 | ||
| 48 | VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); | 48 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); |
| 49 | 49 | ||
| 50 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); | 50 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); |
| 51 | 51 | ||
| @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | |||
| 61 | 61 | ||
| 62 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); | 62 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); |
| 63 | 63 | ||
| 64 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); | ||
| 65 | |||
| 64 | } // namespace Vulkan::MaxwellToVK | 66 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ea4b7c1e6..d7437e185 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -12,8 +12,6 @@ | |||
| 12 | 12 | ||
| 13 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| 14 | 14 | ||
| 15 | #include "common/dynamic_library.h" | ||
| 16 | #include "common/file_util.h" | ||
| 17 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 18 | #include "common/telemetry.h" | 16 | #include "common/telemetry.h" |
| 19 | #include "core/core.h" | 17 | #include "core/core.h" |
| @@ -24,182 +22,27 @@ | |||
| 24 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 26 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 24 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 27 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 25 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 29 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 26 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 30 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 27 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 31 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 28 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 32 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 29 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 33 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 30 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 34 | #include "video_core/renderer_vulkan/wrapper.h" | 31 | #include "video_core/vulkan_common/vulkan_debug_callback.h" |
| 35 | 32 | #include "video_core/vulkan_common/vulkan_device.h" | |
| 36 | // Include these late to avoid polluting previous headers | 33 | #include "video_core/vulkan_common/vulkan_instance.h" |
| 37 | #ifdef _WIN32 | 34 | #include "video_core/vulkan_common/vulkan_library.h" |
| 38 | #include <windows.h> | 35 | #include "video_core/vulkan_common/vulkan_surface.h" |
| 39 | // ensure include order | 36 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 40 | #include <vulkan/vulkan_win32.h> | ||
| 41 | #endif | ||
| 42 | |||
| 43 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 44 | #include <X11/Xlib.h> | ||
| 45 | #include <vulkan/vulkan_wayland.h> | ||
| 46 | #include <vulkan/vulkan_xlib.h> | ||
| 47 | #endif | ||
| 48 | 37 | ||
| 49 | namespace Vulkan { | 38 | namespace Vulkan { |
| 50 | |||
| 51 | namespace { | 39 | namespace { |
| 52 | |||
| 53 | using Core::Frontend::WindowSystemType; | ||
| 54 | |||
| 55 | VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | ||
| 56 | VkDebugUtilsMessageTypeFlagsEXT type, | ||
| 57 | const VkDebugUtilsMessengerCallbackDataEXT* data, | ||
| 58 | [[maybe_unused]] void* user_data) { | ||
| 59 | const char* const message{data->pMessage}; | ||
| 60 | |||
| 61 | if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { | ||
| 62 | LOG_CRITICAL(Render_Vulkan, "{}", message); | ||
| 63 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { | ||
| 64 | LOG_WARNING(Render_Vulkan, "{}", message); | ||
| 65 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { | ||
| 66 | LOG_INFO(Render_Vulkan, "{}", message); | ||
| 67 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { | ||
| 68 | LOG_DEBUG(Render_Vulkan, "{}", message); | ||
| 69 | } | ||
| 70 | return VK_FALSE; | ||
| 71 | } | ||
| 72 | |||
| 73 | Common::DynamicLibrary OpenVulkanLibrary() { | ||
| 74 | Common::DynamicLibrary library; | ||
| 75 | #ifdef __APPLE__ | ||
| 76 | // Check if a path to a specific Vulkan library has been specified. | ||
| 77 | char* libvulkan_env = getenv("LIBVULKAN_PATH"); | ||
| 78 | if (!libvulkan_env || !library.Open(libvulkan_env)) { | ||
| 79 | // Use the libvulkan.dylib from the application bundle. | ||
| 80 | const std::string filename = | ||
| 81 | Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; | ||
| 82 | library.Open(filename.c_str()); | ||
| 83 | } | ||
| 84 | #else | ||
| 85 | std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); | ||
| 86 | if (!library.Open(filename.c_str())) { | ||
| 87 | // Android devices may not have libvulkan.so.1, only libvulkan.so. | ||
| 88 | filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); | ||
| 89 | (void)library.Open(filename.c_str()); | ||
| 90 | } | ||
| 91 | #endif | ||
| 92 | return library; | ||
| 93 | } | ||
| 94 | |||
| 95 | std::pair<vk::Instance, u32> CreateInstance( | ||
| 96 | Common::DynamicLibrary& library, vk::InstanceDispatch& dld, | ||
| 97 | WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { | ||
| 98 | if (!library.IsOpen()) { | ||
| 99 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); | ||
| 100 | return {}; | ||
| 101 | } | ||
| 102 | if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { | ||
| 103 | LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); | ||
| 104 | return {}; | ||
| 105 | } | ||
| 106 | if (!vk::Load(dld)) { | ||
| 107 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | ||
| 108 | return {}; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::vector<const char*> extensions; | ||
| 112 | extensions.reserve(6); | ||
| 113 | switch (window_type) { | ||
| 114 | case Core::Frontend::WindowSystemType::Headless: | ||
| 115 | break; | ||
| 116 | #ifdef _WIN32 | ||
| 117 | case Core::Frontend::WindowSystemType::Windows: | ||
| 118 | extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); | ||
| 119 | break; | ||
| 120 | #endif | ||
| 121 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 122 | case Core::Frontend::WindowSystemType::X11: | ||
| 123 | extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); | ||
| 124 | break; | ||
| 125 | case Core::Frontend::WindowSystemType::Wayland: | ||
| 126 | extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); | ||
| 127 | break; | ||
| 128 | #endif | ||
| 129 | default: | ||
| 130 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 131 | break; | ||
| 132 | } | ||
| 133 | if (window_type != Core::Frontend::WindowSystemType::Headless) { | ||
| 134 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | ||
| 135 | } | ||
| 136 | if (enable_layers) { | ||
| 137 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | ||
| 138 | } | ||
| 139 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | ||
| 140 | |||
| 141 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 142 | if (!properties) { | ||
| 143 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 144 | return {}; | ||
| 145 | } | ||
| 146 | |||
| 147 | for (const char* extension : extensions) { | ||
| 148 | const auto it = | ||
| 149 | std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { | ||
| 150 | return !std::strcmp(extension, prop.extensionName); | ||
| 151 | }); | ||
| 152 | if (it == properties->end()) { | ||
| 153 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 154 | return {}; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | std::vector<const char*> layers; | ||
| 159 | layers.reserve(1); | ||
| 160 | if (enable_layers) { | ||
| 161 | layers.push_back("VK_LAYER_KHRONOS_validation"); | ||
| 162 | } | ||
| 163 | |||
| 164 | const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); | ||
| 165 | if (!layer_properties) { | ||
| 166 | LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); | ||
| 167 | layers.clear(); | ||
| 168 | } | ||
| 169 | |||
| 170 | for (auto layer_it = layers.begin(); layer_it != layers.end();) { | ||
| 171 | const char* const layer = *layer_it; | ||
| 172 | const auto it = std::find_if( | ||
| 173 | layer_properties->begin(), layer_properties->end(), | ||
| 174 | [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); | ||
| 175 | if (it == layer_properties->end()) { | ||
| 176 | LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); | ||
| 177 | layer_it = layers.erase(layer_it); | ||
| 178 | } else { | ||
| 179 | ++layer_it; | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | // Limit the maximum version of Vulkan to avoid using untested version. | ||
| 184 | const u32 version = std::min(vk::AvailableVersion(dld), static_cast<u32>(VK_API_VERSION_1_1)); | ||
| 185 | |||
| 186 | vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld); | ||
| 187 | if (!instance) { | ||
| 188 | LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); | ||
| 189 | return {}; | ||
| 190 | } | ||
| 191 | if (!vk::Load(*instance, dld)) { | ||
| 192 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); | ||
| 193 | } | ||
| 194 | return std::make_pair(std::move(instance), version); | ||
| 195 | } | ||
| 196 | |||
| 197 | std::string GetReadableVersion(u32 version) { | 40 | std::string GetReadableVersion(u32 version) { |
| 198 | return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), | 41 | return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), |
| 199 | VK_VERSION_PATCH(version)); | 42 | VK_VERSION_PATCH(version)); |
| 200 | } | 43 | } |
| 201 | 44 | ||
| 202 | std::string GetDriverVersion(const VKDevice& device) { | 45 | std::string GetDriverVersion(const Device& device) { |
| 203 | // Extracted from | 46 | // Extracted from |
| 204 | // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 | 47 | // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 |
| 205 | const u32 version = device.GetDriverVersion(); | 48 | const u32 version = device.GetDriverVersion(); |
| @@ -216,7 +59,6 @@ std::string GetDriverVersion(const VKDevice& device) { | |||
| 216 | const u32 minor = version & 0x3fff; | 59 | const u32 minor = version & 0x3fff; |
| 217 | return fmt::format("{}.{}", major, minor); | 60 | return fmt::format("{}.{}", major, minor); |
| 218 | } | 61 | } |
| 219 | |||
| 220 | return GetReadableVersion(version); | 62 | return GetReadableVersion(version); |
| 221 | } | 63 | } |
| 222 | 64 | ||
| @@ -255,7 +97,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 255 | if (!framebuffer) { | 97 | if (!framebuffer) { |
| 256 | return; | 98 | return; |
| 257 | } | 99 | } |
| 258 | |||
| 259 | const auto& layout = render_window.GetFramebufferLayout(); | 100 | const auto& layout = render_window.GetFramebufferLayout(); |
| 260 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 101 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { |
| 261 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 102 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| @@ -284,14 +125,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 284 | render_window.OnFrameDisplayed(); | 125 | render_window.OnFrameDisplayed(); |
| 285 | } | 126 | } |
| 286 | 127 | ||
| 287 | bool RendererVulkan::Init() { | 128 | bool RendererVulkan::Init() try { |
| 288 | library = OpenVulkanLibrary(); | 129 | library = OpenLibrary(); |
| 289 | std::tie(instance, instance_version) = CreateInstance( | 130 | instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 290 | library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); | 131 | true, Settings::values.renderer_debug); |
| 291 | if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { | 132 | if (Settings::values.renderer_debug) { |
| 292 | return false; | 133 | debug_callback = CreateDebugCallback(instance); |
| 293 | } | 134 | } |
| 135 | surface = CreateSurface(instance, render_window); | ||
| 294 | 136 | ||
| 137 | InitializeDevice(); | ||
| 295 | Report(); | 138 | Report(); |
| 296 | 139 | ||
| 297 | memory_manager = std::make_unique<VKMemoryManager>(*device); | 140 | memory_manager = std::make_unique<VKMemoryManager>(*device); |
| @@ -311,8 +154,11 @@ bool RendererVulkan::Init() { | |||
| 311 | blit_screen = | 154 | blit_screen = |
| 312 | std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, | 155 | std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, |
| 313 | *memory_manager, *swapchain, *scheduler, screen_info); | 156 | *memory_manager, *swapchain, *scheduler, screen_info); |
| 314 | |||
| 315 | return true; | 157 | return true; |
| 158 | |||
| 159 | } catch (const vk::Exception& exception) { | ||
| 160 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||
| 161 | return false; | ||
| 316 | } | 162 | } |
| 317 | 163 | ||
| 318 | void RendererVulkan::ShutDown() { | 164 | void RendererVulkan::ShutDown() { |
| @@ -322,7 +168,6 @@ void RendererVulkan::ShutDown() { | |||
| 322 | if (const auto& dev = device->GetLogical()) { | 168 | if (const auto& dev = device->GetLogical()) { |
| 323 | dev.WaitIdle(); | 169 | dev.WaitIdle(); |
| 324 | } | 170 | } |
| 325 | |||
| 326 | rasterizer.reset(); | 171 | rasterizer.reset(); |
| 327 | blit_screen.reset(); | 172 | blit_screen.reset(); |
| 328 | scheduler.reset(); | 173 | scheduler.reset(); |
| @@ -331,95 +176,15 @@ void RendererVulkan::ShutDown() { | |||
| 331 | device.reset(); | 176 | device.reset(); |
| 332 | } | 177 | } |
| 333 | 178 | ||
| 334 | bool RendererVulkan::CreateDebugCallback() { | 179 | void RendererVulkan::InitializeDevice() { |
| 335 | if (!Settings::values.renderer_debug) { | 180 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); |
| 336 | return true; | ||
| 337 | } | ||
| 338 | debug_callback = instance.TryCreateDebugCallback(DebugCallback); | ||
| 339 | if (!debug_callback) { | ||
| 340 | LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); | ||
| 341 | return false; | ||
| 342 | } | ||
| 343 | return true; | ||
| 344 | } | ||
| 345 | |||
| 346 | bool RendererVulkan::CreateSurface() { | ||
| 347 | [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); | ||
| 348 | VkSurfaceKHR unsafe_surface = nullptr; | ||
| 349 | |||
| 350 | #ifdef _WIN32 | ||
| 351 | if (window_info.type == Core::Frontend::WindowSystemType::Windows) { | ||
| 352 | const HWND hWnd = static_cast<HWND>(window_info.render_surface); | ||
| 353 | const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, | ||
| 354 | nullptr, 0, nullptr, hWnd}; | ||
| 355 | const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( | ||
| 356 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); | ||
| 357 | if (!vkCreateWin32SurfaceKHR || | ||
| 358 | vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 359 | LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); | ||
| 360 | return false; | ||
| 361 | } | ||
| 362 | } | ||
| 363 | #endif | ||
| 364 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 365 | if (window_info.type == Core::Frontend::WindowSystemType::X11) { | ||
| 366 | const VkXlibSurfaceCreateInfoKHR xlib_ci{ | ||
| 367 | VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 368 | static_cast<Display*>(window_info.display_connection), | ||
| 369 | reinterpret_cast<Window>(window_info.render_surface)}; | ||
| 370 | const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( | ||
| 371 | dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); | ||
| 372 | if (!vkCreateXlibSurfaceKHR || | ||
| 373 | vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 374 | LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); | ||
| 375 | return false; | ||
| 376 | } | ||
| 377 | } | ||
| 378 | if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { | ||
| 379 | const VkWaylandSurfaceCreateInfoKHR wayland_ci{ | ||
| 380 | VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 381 | static_cast<wl_display*>(window_info.display_connection), | ||
| 382 | static_cast<wl_surface*>(window_info.render_surface)}; | ||
| 383 | const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( | ||
| 384 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); | ||
| 385 | if (!vkCreateWaylandSurfaceKHR || | ||
| 386 | vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != | ||
| 387 | VK_SUCCESS) { | ||
| 388 | LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); | ||
| 389 | return false; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | #endif | ||
| 393 | if (!unsafe_surface) { | ||
| 394 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 395 | return false; | ||
| 396 | } | ||
| 397 | |||
| 398 | surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); | ||
| 399 | return true; | ||
| 400 | } | ||
| 401 | |||
| 402 | bool RendererVulkan::PickDevices() { | ||
| 403 | const auto devices = instance.EnumeratePhysicalDevices(); | ||
| 404 | if (!devices) { | ||
| 405 | LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); | ||
| 406 | return false; | ||
| 407 | } | ||
| 408 | |||
| 409 | const s32 device_index = Settings::values.vulkan_device.GetValue(); | 181 | const s32 device_index = Settings::values.vulkan_device.GetValue(); |
| 410 | if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { | 182 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { |
| 411 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | 183 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); |
| 412 | return false; | 184 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| 413 | } | ||
| 414 | const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)], | ||
| 415 | dld); | ||
| 416 | if (!VKDevice::IsSuitable(physical_device, *surface)) { | ||
| 417 | return false; | ||
| 418 | } | 185 | } |
| 419 | 186 | const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); | |
| 420 | device = | 187 | device = std::make_unique<Device>(*instance, physical_device, *surface, dld); |
| 421 | std::make_unique<VKDevice>(*instance, instance_version, physical_device, *surface, dld); | ||
| 422 | return device->Create(); | ||
| 423 | } | 188 | } |
| 424 | 189 | ||
| 425 | void RendererVulkan::Report() const { | 190 | void RendererVulkan::Report() const { |
| @@ -444,25 +209,21 @@ void RendererVulkan::Report() const { | |||
| 444 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | 209 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); |
| 445 | } | 210 | } |
| 446 | 211 | ||
| 447 | std::vector<std::string> RendererVulkan::EnumerateDevices() { | 212 | std::vector<std::string> RendererVulkan::EnumerateDevices() try { |
| 448 | vk::InstanceDispatch dld; | 213 | vk::InstanceDispatch dld; |
| 449 | Common::DynamicLibrary library = OpenVulkanLibrary(); | 214 | const Common::DynamicLibrary library = OpenLibrary(); |
| 450 | vk::Instance instance = CreateInstance(library, dld).first; | 215 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); |
| 451 | if (!instance) { | 216 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); |
| 452 | return {}; | ||
| 453 | } | ||
| 454 | |||
| 455 | const std::optional physical_devices = instance.EnumeratePhysicalDevices(); | ||
| 456 | if (!physical_devices) { | ||
| 457 | return {}; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::vector<std::string> names; | 217 | std::vector<std::string> names; |
| 461 | names.reserve(physical_devices->size()); | 218 | names.reserve(physical_devices.size()); |
| 462 | for (const auto& device : *physical_devices) { | 219 | for (const VkPhysicalDevice device : physical_devices) { |
| 463 | names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); | 220 | names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); |
| 464 | } | 221 | } |
| 465 | return names; | 222 | return names; |
| 223 | |||
| 224 | } catch (const vk::Exception& exception) { | ||
| 225 | LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); | ||
| 226 | return {}; | ||
| 466 | } | 227 | } |
| 467 | 228 | ||
| 468 | } // namespace Vulkan | 229 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 977b86003..5575ffc54 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "common/dynamic_library.h" | 11 | #include "common/dynamic_library.h" |
| 12 | 12 | ||
| 13 | #include "video_core/renderer_base.h" | 13 | #include "video_core/renderer_base.h" |
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 15 | ||
| 16 | namespace Core { | 16 | namespace Core { |
| 17 | class TelemetrySession; | 17 | class TelemetrySession; |
| @@ -27,16 +27,15 @@ class GPU; | |||
| 27 | 27 | ||
| 28 | namespace Vulkan { | 28 | namespace Vulkan { |
| 29 | 29 | ||
| 30 | class Device; | ||
| 30 | class StateTracker; | 31 | class StateTracker; |
| 31 | class VKBlitScreen; | 32 | class VKBlitScreen; |
| 32 | class VKDevice; | ||
| 33 | class VKMemoryManager; | 33 | class VKMemoryManager; |
| 34 | class VKSwapchain; | 34 | class VKSwapchain; |
| 35 | class VKScheduler; | 35 | class VKScheduler; |
| 36 | class VKImage; | ||
| 37 | 36 | ||
| 38 | struct VKScreenInfo { | 37 | struct VKScreenInfo { |
| 39 | VKImage* image{}; | 38 | VkImageView image_view{}; |
| 40 | u32 width{}; | 39 | u32 width{}; |
| 41 | u32 height{}; | 40 | u32 height{}; |
| 42 | bool is_srgb{}; | 41 | bool is_srgb{}; |
| @@ -57,11 +56,7 @@ public: | |||
| 57 | static std::vector<std::string> EnumerateDevices(); | 56 | static std::vector<std::string> EnumerateDevices(); |
| 58 | 57 | ||
| 59 | private: | 58 | private: |
| 60 | bool CreateDebugCallback(); | 59 | void InitializeDevice(); |
| 61 | |||
| 62 | bool CreateSurface(); | ||
| 63 | |||
| 64 | bool PickDevices(); | ||
| 65 | 60 | ||
| 66 | void Report() const; | 61 | void Report() const; |
| 67 | 62 | ||
| @@ -73,14 +68,13 @@ private: | |||
| 73 | vk::InstanceDispatch dld; | 68 | vk::InstanceDispatch dld; |
| 74 | 69 | ||
| 75 | vk::Instance instance; | 70 | vk::Instance instance; |
| 76 | u32 instance_version{}; | ||
| 77 | 71 | ||
| 78 | vk::SurfaceKHR surface; | 72 | vk::SurfaceKHR surface; |
| 79 | 73 | ||
| 80 | VKScreenInfo screen_info; | 74 | VKScreenInfo screen_info; |
| 81 | 75 | ||
| 82 | vk::DebugCallback debug_callback; | 76 | vk::DebugUtilsMessenger debug_callback; |
| 83 | std::unique_ptr<VKDevice> device; | 77 | std::unique_ptr<Device> device; |
| 84 | std::unique_ptr<VKMemoryManager> memory_manager; | 78 | std::unique_ptr<VKMemoryManager> memory_manager; |
| 85 | std::unique_ptr<StateTracker> state_tracker; | 79 | std::unique_ptr<StateTracker> state_tracker; |
| 86 | std::unique_ptr<VKScheduler> scheduler; | 80 | std::unique_ptr<VKScheduler> scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..5e184eb42 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -16,121 +16,25 @@ | |||
| 16 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| 17 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 18 | #include "video_core/gpu.h" | 18 | #include "video_core/gpu.h" |
| 19 | #include "video_core/morton.h" | 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | ||
| 20 | #include "video_core/rasterizer_interface.h" | 21 | #include "video_core/rasterizer_interface.h" |
| 21 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 22 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 22 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 23 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 23 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 24 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 26 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 28 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 27 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 29 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 28 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 30 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 31 | #include "video_core/surface.h" | 29 | #include "video_core/surface.h" |
| 30 | #include "video_core/textures/decoders.h" | ||
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 32 | 33 | ||
| 33 | namespace Vulkan { | 34 | namespace Vulkan { |
| 34 | 35 | ||
| 35 | namespace { | 36 | namespace { |
| 36 | 37 | ||
| 37 | // Generated from the "shaders/" directory, read the instructions there. | ||
| 38 | constexpr u8 blit_vertex_code[] = { | ||
| 39 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 40 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 41 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 42 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 43 | 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 44 | 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 45 | 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 46 | 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 47 | 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, | ||
| 48 | 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 49 | 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 50 | 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 51 | 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, | ||
| 52 | 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 53 | 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 54 | 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 55 | 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 56 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 57 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 58 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 59 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 60 | 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||
| 61 | 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 62 | 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 63 | 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 64 | 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 65 | 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 66 | 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 67 | 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 68 | 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 69 | 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 70 | 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, | ||
| 71 | 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, | ||
| 72 | 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, | ||
| 73 | 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 74 | 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 75 | 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, | ||
| 76 | 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, | ||
| 77 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 78 | 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 79 | 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 80 | 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 81 | 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 82 | 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 83 | 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 84 | 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 85 | 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 86 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 87 | 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 88 | 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 89 | 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, | ||
| 90 | 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 91 | 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, | ||
| 92 | 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 93 | 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 94 | 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, | ||
| 95 | 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, | ||
| 96 | 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 97 | 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 98 | 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 99 | 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, | ||
| 100 | 0x38, 0x00, 0x01, 0x00}; | ||
| 101 | |||
| 102 | constexpr u8 blit_fragment_code[] = { | ||
| 103 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, | ||
| 104 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 105 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 106 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 107 | 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 108 | 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, | ||
| 109 | 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 110 | 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 111 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 112 | 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 113 | 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 114 | 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, | ||
| 115 | 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 116 | 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, | ||
| 117 | 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, | ||
| 118 | 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||
| 119 | 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 120 | 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, | ||
| 121 | 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 122 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 123 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, | ||
| 124 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 125 | 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, | ||
| 126 | 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 127 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 128 | 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 129 | 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 130 | 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 131 | 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 132 | 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||
| 133 | |||
| 134 | struct ScreenRectVertex { | 38 | struct ScreenRectVertex { |
| 135 | ScreenRectVertex() = default; | 39 | ScreenRectVertex() = default; |
| 136 | explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} | 40 | explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} |
| @@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { | |||
| 173 | // clang-format on | 77 | // clang-format on |
| 174 | } | 78 | } |
| 175 | 79 | ||
| 176 | std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { | 80 | u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { |
| 177 | using namespace VideoCore::Surface; | 81 | using namespace VideoCore::Surface; |
| 178 | return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); | 82 | return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); |
| 179 | } | 83 | } |
| 180 | 84 | ||
| 181 | std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { | 85 | std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { |
| @@ -210,7 +114,7 @@ struct VKBlitScreen::BufferData { | |||
| 210 | 114 | ||
| 211 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, | 115 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, |
| 212 | Core::Frontend::EmuWindow& render_window_, | 116 | Core::Frontend::EmuWindow& render_window_, |
| 213 | VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, | 117 | VideoCore::RasterizerInterface& rasterizer_, const Device& device_, |
| 214 | VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, | 118 | VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, |
| 215 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) | 119 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) |
| 216 | : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, | 120 | : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, |
| @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 239 | scheduler.Wait(resource_ticks[image_index]); | 143 | scheduler.Wait(resource_ticks[image_index]); |
| 240 | resource_ticks[image_index] = scheduler.CurrentTick(); | 144 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 241 | 145 | ||
| 242 | VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); | 146 | UpdateDescriptorSet(image_index, |
| 243 | 147 | use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); | |
| 244 | UpdateDescriptorSet(image_index, blit_image->GetPresentView()); | ||
| 245 | 148 | ||
| 246 | BufferData data; | 149 | BufferData data; |
| 247 | SetUniformData(data, framebuffer); | 150 | SetUniformData(data, framebuffer); |
| 248 | SetVertexData(data, framebuffer); | 151 | SetVertexData(data, framebuffer); |
| 249 | 152 | ||
| 250 | auto map = buffer_commit->Map(); | 153 | auto map = buffer_commit->Map(); |
| 251 | std::memcpy(map.GetAddress(), &data, sizeof(data)); | 154 | std::memcpy(map.Address(), &data, sizeof(data)); |
| 252 | 155 | ||
| 253 | if (!use_accelerated) { | 156 | if (!use_accelerated) { |
| 254 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 157 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); |
| 255 | 158 | ||
| 256 | const auto pixel_format = | ||
| 257 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); | ||
| 258 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 159 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 259 | const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 160 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| 260 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); | 161 | const size_t size_bytes = GetSizeInBytes(framebuffer); |
| 162 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); | ||
| 261 | 163 | ||
| 262 | // TODO(Rodrigo): Read this from HLE | 164 | // TODO(Rodrigo): Read this from HLE |
| 263 | constexpr u32 block_height_log2 = 4; | 165 | constexpr u32 block_height_log2 = 4; |
| 264 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | 166 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); |
| 265 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | 167 | Tegra::Texture::UnswizzleTexture( |
| 266 | map.GetAddress() + image_offset, host_ptr); | 168 | std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), |
| 267 | 169 | bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | |
| 268 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 269 | VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | ||
| 270 | 170 | ||
| 271 | const VkBufferImageCopy copy{ | 171 | const VkBufferImageCopy copy{ |
| 272 | .bufferOffset = image_offset, | 172 | .bufferOffset = image_offset, |
| @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 288 | }, | 188 | }, |
| 289 | }; | 189 | }; |
| 290 | scheduler.Record( | 190 | scheduler.Record( |
| 291 | [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { | 191 | [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { |
| 292 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | 192 | const VkImageMemoryBarrier base_barrier{ |
| 193 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 194 | .pNext = nullptr, | ||
| 195 | .srcAccessMask = 0, | ||
| 196 | .dstAccessMask = 0, | ||
| 197 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 198 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 199 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 200 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 201 | .image = image, | ||
| 202 | .subresourceRange = | ||
| 203 | { | ||
| 204 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 205 | .baseMipLevel = 0, | ||
| 206 | .levelCount = 1, | ||
| 207 | .baseArrayLayer = 0, | ||
| 208 | .layerCount = 1, | ||
| 209 | }, | ||
| 210 | }; | ||
| 211 | VkImageMemoryBarrier read_barrier = base_barrier; | ||
| 212 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; | ||
| 213 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 214 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 215 | |||
| 216 | VkImageMemoryBarrier write_barrier = base_barrier; | ||
| 217 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 218 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | ||
| 219 | |||
| 220 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 221 | 0, read_barrier); | ||
| 222 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); | ||
| 223 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 224 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); | ||
| 293 | }); | 225 | }); |
| 294 | } | 226 | } |
| 295 | map.Release(); | 227 | map.Release(); |
| 296 | 228 | ||
| 297 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, | ||
| 298 | VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); | ||
| 299 | |||
| 300 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], | 229 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], |
| 301 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, | 230 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, |
| 302 | size = swapchain.GetSize(), pipeline = *pipeline, | 231 | size = swapchain.GetSize(), pipeline = *pipeline, |
| @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 304 | const VkClearValue clear_color{ | 233 | const VkClearValue clear_color{ |
| 305 | .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, | 234 | .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, |
| 306 | }; | 235 | }; |
| 307 | 236 | const VkRenderPassBeginInfo renderpass_bi{ | |
| 308 | VkRenderPassBeginInfo renderpass_bi; | 237 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 309 | renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; | 238 | .pNext = nullptr, |
| 310 | renderpass_bi.pNext = nullptr; | 239 | .renderPass = renderpass, |
| 311 | renderpass_bi.renderPass = renderpass; | 240 | .framebuffer = framebuffer, |
| 312 | renderpass_bi.framebuffer = framebuffer; | 241 | .renderArea = |
| 313 | renderpass_bi.renderArea.offset.x = 0; | 242 | { |
| 314 | renderpass_bi.renderArea.offset.y = 0; | 243 | .offset = {0, 0}, |
| 315 | renderpass_bi.renderArea.extent = size; | 244 | .extent = size, |
| 316 | renderpass_bi.clearValueCount = 1; | 245 | }, |
| 317 | renderpass_bi.pClearValues = &clear_color; | 246 | .clearValueCount = 1, |
| 318 | 247 | .pClearValues = &clear_color, | |
| 319 | VkViewport viewport; | 248 | }; |
| 320 | viewport.x = 0.0f; | 249 | const VkViewport viewport{ |
| 321 | viewport.y = 0.0f; | 250 | .x = 0.0f, |
| 322 | viewport.width = static_cast<float>(size.width); | 251 | .y = 0.0f, |
| 323 | viewport.height = static_cast<float>(size.height); | 252 | .width = static_cast<float>(size.width), |
| 324 | viewport.minDepth = 0.0f; | 253 | .height = static_cast<float>(size.height), |
| 325 | viewport.maxDepth = 1.0f; | 254 | .minDepth = 0.0f, |
| 326 | 255 | .maxDepth = 1.0f, | |
| 327 | VkRect2D scissor; | 256 | }; |
| 328 | scissor.offset.x = 0; | 257 | const VkRect2D scissor{ |
| 329 | scissor.offset.y = 0; | 258 | .offset = {0, 0}, |
| 330 | scissor.extent = size; | 259 | .extent = size, |
| 331 | 260 | }; | |
| 332 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 261 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 333 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 262 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| 334 | cmdbuf.SetViewport(0, viewport); | 263 | cmdbuf.SetViewport(0, viewport); |
| @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) | |||
| 372 | } | 301 | } |
| 373 | 302 | ||
| 374 | void VKBlitScreen::CreateShaders() { | 303 | void VKBlitScreen::CreateShaders() { |
| 375 | vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); | 304 | vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); |
| 376 | fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); | 305 | fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); |
| 377 | } | 306 | } |
| 378 | 307 | ||
| 379 | void VKBlitScreen::CreateSemaphores() { | 308 | void VKBlitScreen::CreateSemaphores() { |
| @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() { | |||
| 420 | 349 | ||
| 421 | const VkAttachmentReference color_attachment_ref{ | 350 | const VkAttachmentReference color_attachment_ref{ |
| 422 | .attachment = 0, | 351 | .attachment = 0, |
| 423 | .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, | 352 | .layout = VK_IMAGE_LAYOUT_GENERAL, |
| 424 | }; | 353 | }; |
| 425 | 354 | ||
| 426 | const VkSubpassDescription subpass_description{ | 355 | const VkSubpassDescription subpass_description{ |
| @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff | |||
| 735 | 664 | ||
| 736 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | 665 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { |
| 737 | raw_images.resize(image_count); | 666 | raw_images.resize(image_count); |
| 667 | raw_image_views.resize(image_count); | ||
| 738 | raw_buffer_commits.resize(image_count); | 668 | raw_buffer_commits.resize(image_count); |
| 739 | 669 | ||
| 740 | const VkImageCreateInfo ci{ | 670 | for (size_t i = 0; i < image_count; ++i) { |
| 741 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 671 | raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ |
| 742 | .pNext = nullptr, | 672 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 743 | .flags = 0, | 673 | .pNext = nullptr, |
| 744 | .imageType = VK_IMAGE_TYPE_2D, | 674 | .flags = 0, |
| 745 | .format = GetFormat(framebuffer), | 675 | .imageType = VK_IMAGE_TYPE_2D, |
| 746 | .extent = | 676 | .format = GetFormat(framebuffer), |
| 747 | { | 677 | .extent = |
| 748 | .width = framebuffer.width, | 678 | { |
| 749 | .height = framebuffer.height, | 679 | .width = framebuffer.width, |
| 750 | .depth = 1, | 680 | .height = framebuffer.height, |
| 751 | }, | 681 | .depth = 1, |
| 752 | .mipLevels = 1, | 682 | }, |
| 753 | .arrayLayers = 1, | 683 | .mipLevels = 1, |
| 754 | .samples = VK_SAMPLE_COUNT_1_BIT, | 684 | .arrayLayers = 1, |
| 755 | .tiling = VK_IMAGE_TILING_LINEAR, | 685 | .samples = VK_SAMPLE_COUNT_1_BIT, |
| 756 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, | 686 | .tiling = VK_IMAGE_TILING_LINEAR, |
| 757 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 687 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, |
| 758 | .queueFamilyIndexCount = 0, | 688 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 759 | .pQueueFamilyIndices = nullptr, | 689 | .queueFamilyIndexCount = 0, |
| 760 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 690 | .pQueueFamilyIndices = nullptr, |
| 761 | }; | 691 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 762 | 692 | }); | |
| 763 | for (std::size_t i = 0; i < image_count; ++i) { | 693 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); |
| 764 | raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); | 694 | raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |
| 765 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); | 695 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 696 | .pNext = nullptr, | ||
| 697 | .flags = 0, | ||
| 698 | .image = *raw_images[i], | ||
| 699 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 700 | .format = GetFormat(framebuffer), | ||
| 701 | .components = | ||
| 702 | { | ||
| 703 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 704 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 705 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 706 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 707 | }, | ||
| 708 | .subresourceRange = | ||
| 709 | { | ||
| 710 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 711 | .baseMipLevel = 0, | ||
| 712 | .levelCount = 1, | ||
| 713 | .baseArrayLayer = 0, | ||
| 714 | .layerCount = 1, | ||
| 715 | }, | ||
| 716 | }); | ||
| 766 | } | 717 | } |
| 767 | } | 718 | } |
| 768 | 719 | ||
| @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag | |||
| 789 | const VkDescriptorImageInfo image_info{ | 740 | const VkDescriptorImageInfo image_info{ |
| 790 | .sampler = *sampler, | 741 | .sampler = *sampler, |
| 791 | .imageView = image_view, | 742 | .imageView = image_view, |
| 792 | .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, | 743 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 793 | }; | 744 | }; |
| 794 | 745 | ||
| 795 | const VkWriteDescriptorSet sampler_write{ | 746 | const VkWriteDescriptorSet sampler_write{ |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..69ed61770 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 9 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Core { | 12 | namespace Core { |
| 13 | class System; | 13 | class System; |
| @@ -33,9 +33,8 @@ namespace Vulkan { | |||
| 33 | 33 | ||
| 34 | struct ScreenInfo; | 34 | struct ScreenInfo; |
| 35 | 35 | ||
| 36 | class Device; | ||
| 36 | class RasterizerVulkan; | 37 | class RasterizerVulkan; |
| 37 | class VKDevice; | ||
| 38 | class VKImage; | ||
| 39 | class VKScheduler; | 38 | class VKScheduler; |
| 40 | class VKSwapchain; | 39 | class VKSwapchain; |
| 41 | 40 | ||
| @@ -43,7 +42,7 @@ class VKBlitScreen final { | |||
| 43 | public: | 42 | public: |
| 44 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, | 43 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, |
| 45 | Core::Frontend::EmuWindow& render_window, | 44 | Core::Frontend::EmuWindow& render_window, |
| 46 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | 45 | VideoCore::RasterizerInterface& rasterizer, const Device& device, |
| 47 | VKMemoryManager& memory_manager, VKSwapchain& swapchain, | 46 | VKMemoryManager& memory_manager, VKSwapchain& swapchain, |
| 48 | VKScheduler& scheduler, const VKScreenInfo& screen_info); | 47 | VKScheduler& scheduler, const VKScreenInfo& screen_info); |
| 49 | ~VKBlitScreen(); | 48 | ~VKBlitScreen(); |
| @@ -86,7 +85,7 @@ private: | |||
| 86 | Core::Memory::Memory& cpu_memory; | 85 | Core::Memory::Memory& cpu_memory; |
| 87 | Core::Frontend::EmuWindow& render_window; | 86 | Core::Frontend::EmuWindow& render_window; |
| 88 | VideoCore::RasterizerInterface& rasterizer; | 87 | VideoCore::RasterizerInterface& rasterizer; |
| 89 | const VKDevice& device; | 88 | const Device& device; |
| 90 | VKMemoryManager& memory_manager; | 89 | VKMemoryManager& memory_manager; |
| 91 | VKSwapchain& swapchain; | 90 | VKSwapchain& swapchain; |
| 92 | VKScheduler& scheduler; | 91 | VKScheduler& scheduler; |
| @@ -110,7 +109,8 @@ private: | |||
| 110 | std::vector<u64> resource_ticks; | 109 | std::vector<u64> resource_ticks; |
| 111 | 110 | ||
| 112 | std::vector<vk::Semaphore> semaphores; | 111 | std::vector<vk::Semaphore> semaphores; |
| 113 | std::vector<std::unique_ptr<VKImage>> raw_images; | 112 | std::vector<vk::Image> raw_images; |
| 113 | std::vector<vk::ImageView> raw_image_views; | ||
| 114 | std::vector<VKMemoryCommit> raw_buffer_commits; | 114 | std::vector<VKMemoryCommit> raw_buffer_commits; |
| 115 | u32 raw_width = 0; | 115 | u32 raw_width = 0; |
| 116 | u32 raw_height = 0; | 116 | u32 raw_height = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 444d3fb93..4d517c547 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -9,10 +9,10 @@ | |||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 15 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_device.h" |
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | 18 | ||
| @@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | |||
| 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | |
| 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; |
| 33 | 33 | ||
| 34 | std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | 34 | constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = |
| 35 | return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); | 35 | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; |
| 36 | |||
| 37 | std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const Device& device, VKScheduler& scheduler) { | ||
| 38 | return std::make_unique<VKStreamBuffer>(device, scheduler); | ||
| 36 | } | 39 | } |
| 37 | 40 | ||
| 38 | } // Anonymous namespace | 41 | } // Anonymous namespace |
| 39 | 42 | ||
| 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, | 43 | Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, |
| 41 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | 44 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) |
| 42 | : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { | 45 | : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ |
| 46 | staging_pool_} { | ||
| 43 | const VkBufferCreateInfo ci{ | 47 | const VkBufferCreateInfo ci{ |
| 44 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 48 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 45 | .pNext = nullptr, | 49 | .pNext = nullptr, |
| @@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | |||
| 64 | scheduler.RequestOutsideRenderPassOperationContext(); | 68 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 65 | 69 | ||
| 66 | const VkBuffer handle = Handle(); | 70 | const VkBuffer handle = Handle(); |
| 67 | scheduler.Record( | 71 | scheduler.Record([staging = *staging.handle, handle, offset, data_size, |
| 68 | [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | 72 | &device = device](vk::CommandBuffer cmdbuf) { |
| 69 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | 73 | const VkBufferMemoryBarrier read_barrier{ |
| 70 | 74 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | |
| 71 | const VkBufferMemoryBarrier barrier{ | 75 | .pNext = nullptr, |
| 72 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 76 | .srcAccessMask = |
| 73 | .pNext = nullptr, | 77 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | |
| 74 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 78 | VK_ACCESS_HOST_WRITE_BIT | |
| 75 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | 79 | (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), |
| 76 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 80 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 77 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 81 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 78 | .buffer = handle, | 82 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 79 | .offset = offset, | 83 | .buffer = handle, |
| 80 | .size = data_size, | 84 | .offset = offset, |
| 81 | }; | 85 | .size = data_size, |
| 82 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | 86 | }; |
| 83 | barrier, {}); | 87 | const VkBufferMemoryBarrier write_barrier{ |
| 84 | }); | 88 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| 89 | .pNext = nullptr, | ||
| 90 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 91 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | ||
| 92 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 93 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 94 | .buffer = handle, | ||
| 95 | .offset = offset, | ||
| 96 | .size = data_size, | ||
| 97 | }; | ||
| 98 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 99 | 0, read_barrier); | ||
| 100 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | ||
| 101 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | ||
| 102 | write_barrier); | ||
| 103 | }); | ||
| 85 | } | 104 | } |
| 86 | 105 | ||
| 87 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 106 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { |
| @@ -149,9 +168,11 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst | |||
| 149 | 168 | ||
| 150 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 169 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 151 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 170 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 152 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 171 | const Device& device_, VKMemoryManager& memory_manager_, |
| 153 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) | 172 | VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, |
| 154 | : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, | 173 | VKStagingBufferPool& staging_pool_) |
| 174 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||
| 175 | cpu_memory_, stream_buffer_}, | ||
| 155 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ | 176 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ |
| 156 | staging_pool_} {} | 177 | staging_pool_} {} |
| 157 | 178 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6008b8373..1c39aed34 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -11,17 +11,17 @@ | |||
| 11 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 12 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 15 | ||
| 16 | namespace Vulkan { | 16 | namespace Vulkan { |
| 17 | 17 | ||
| 18 | class VKDevice; | 18 | class Device; |
| 19 | class VKMemoryManager; | 19 | class VKMemoryManager; |
| 20 | class VKScheduler; | 20 | class VKScheduler; |
| 21 | 21 | ||
| 22 | class Buffer final : public VideoCommon::BufferBlock { | 22 | class Buffer final : public VideoCommon::BufferBlock { |
| 23 | public: | 23 | public: |
| 24 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, | 24 | explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, |
| 25 | VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); | 25 | VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); |
| 26 | ~Buffer(); | 26 | ~Buffer(); |
| 27 | 27 | ||
| @@ -41,6 +41,7 @@ public: | |||
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | private: | 43 | private: |
| 44 | const Device& device; | ||
| 44 | VKScheduler& scheduler; | 45 | VKScheduler& scheduler; |
| 45 | VKStagingBufferPool& staging_pool; | 46 | VKStagingBufferPool& staging_pool; |
| 46 | 47 | ||
| @@ -49,10 +50,11 @@ private: | |||
| 49 | 50 | ||
| 50 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 51 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { |
| 51 | public: | 52 | public: |
| 52 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 53 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, |
| 53 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 54 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, |
| 54 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 55 | const Device& device, VKMemoryManager& memory_manager, |
| 55 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); | 56 | VKScheduler& scheduler, VKStreamBuffer& stream_buffer, |
| 57 | VKStagingBufferPool& staging_pool); | ||
| 56 | ~VKBufferCache(); | 58 | ~VKBufferCache(); |
| 57 | 59 | ||
| 58 | BufferInfo GetEmptyBuffer(std::size_t size) override; | 60 | BufferInfo GetEmptyBuffer(std::size_t size) override; |
| @@ -61,7 +63,7 @@ protected: | |||
| 61 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 63 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 62 | 64 | ||
| 63 | private: | 65 | private: |
| 64 | const VKDevice& device; | 66 | const Device& device; |
| 65 | VKMemoryManager& memory_manager; | 67 | VKMemoryManager& memory_manager; |
| 66 | VKScheduler& scheduler; | 68 | VKScheduler& scheduler; |
| 67 | VKStagingBufferPool& staging_pool; | 69 | VKStagingBufferPool& staging_pool; |
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 8f7d6410e..a99df9323 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp | |||
| @@ -5,8 +5,8 @@ | |||
| 5 | #include <cstddef> | 5 | #include <cstddef> |
| 6 | 6 | ||
| 7 | #include "video_core/renderer_vulkan/vk_command_pool.h" | 7 | #include "video_core/renderer_vulkan/vk_command_pool.h" |
| 8 | #include "video_core/renderer_vulkan/vk_device.h" | 8 | #include "video_core/vulkan_common/vulkan_device.h" |
| 9 | #include "video_core/renderer_vulkan/wrapper.h" | 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 10 | 10 | ||
| 11 | namespace Vulkan { | 11 | namespace Vulkan { |
| 12 | 12 | ||
| @@ -17,7 +17,7 @@ struct CommandPool::Pool { | |||
| 17 | vk::CommandBuffers cmdbufs; | 17 | vk::CommandBuffers cmdbufs; |
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) | 20 | CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_) |
| 21 | : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} | 21 | : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} |
| 22 | 22 | ||
| 23 | CommandPool::~CommandPool() = default; | 23 | CommandPool::~CommandPool() = default; |
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index 62a7ce3f1..61c26a22a 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h | |||
| @@ -8,16 +8,16 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | class Device; | ||
| 15 | class MasterSemaphore; | 16 | class MasterSemaphore; |
| 16 | class VKDevice; | ||
| 17 | 17 | ||
| 18 | class CommandPool final : public ResourcePool { | 18 | class CommandPool final : public ResourcePool { |
| 19 | public: | 19 | public: |
| 20 | explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); | 20 | explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_); |
| 21 | ~CommandPool() override; | 21 | ~CommandPool() override; |
| 22 | 22 | ||
| 23 | void Allocate(size_t begin, size_t end) override; | 23 | void Allocate(size_t begin, size_t end) override; |
| @@ -27,7 +27,7 @@ public: | |||
| 27 | private: | 27 | private: |
| 28 | struct Pool; | 28 | struct Pool; |
| 29 | 29 | ||
| 30 | const VKDevice& device; | 30 | const Device& device; |
| 31 | std::vector<Pool> pools; | 31 | std::vector<Pool> pools; |
| 32 | }; | 32 | }; |
| 33 | 33 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1ac7e2a30..02a6d54b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,111 +10,21 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | ||
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 17 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 17 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_device.h" |
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 20 | 23 | ||
| 21 | namespace Vulkan { | 24 | namespace Vulkan { |
| 22 | 25 | ||
| 23 | namespace { | 26 | namespace { |
| 24 | 27 | ||
| 25 | // Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. | ||
| 26 | constexpr u8 quad_array[] = { | ||
| 27 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, | ||
| 28 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 29 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 30 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 31 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 32 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 33 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 34 | 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 35 | 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 36 | 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 37 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 38 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 39 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 40 | 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 41 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 42 | 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 43 | 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 44 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 45 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 46 | 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 47 | 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 48 | 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 49 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 50 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 51 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 52 | 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 53 | 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 54 | 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 55 | 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 56 | 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, | ||
| 57 | 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 58 | 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 59 | 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 60 | 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 61 | 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 62 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 63 | 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 64 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 65 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 66 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 67 | 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 68 | 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, | ||
| 69 | 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 70 | 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 71 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 72 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 73 | 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 74 | 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 75 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 76 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, | ||
| 77 | 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, | ||
| 78 | 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 79 | 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, | ||
| 80 | 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 81 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 82 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 83 | 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 84 | 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 85 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 86 | 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 87 | 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 88 | 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 89 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 90 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 91 | 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 92 | 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, | ||
| 93 | 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 94 | 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, | ||
| 95 | 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, | ||
| 96 | 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 97 | 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 98 | 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 99 | 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 100 | 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 101 | 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 102 | 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, | ||
| 103 | 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 104 | 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, | ||
| 105 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, | ||
| 106 | 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, | ||
| 107 | 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 108 | 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, | ||
| 109 | 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 110 | 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 111 | 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 112 | 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 113 | 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, | ||
| 114 | 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 115 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 116 | }; | ||
| 117 | |||
| 118 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | 28 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { |
| 119 | return { | 29 | return { |
| 120 | .binding = 0, | 30 | .binding = 0, |
| @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | |||
| 144 | }; | 54 | }; |
| 145 | } | 55 | } |
| 146 | 56 | ||
| 147 | // Uint8 SPIR-V module. Generated from the "shaders/" directory. | ||
| 148 | constexpr u8 uint8_pass[] = { | ||
| 149 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, | ||
| 150 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, | ||
| 151 | 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, | ||
| 152 | 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, | ||
| 153 | 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, | ||
| 154 | 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, | ||
| 155 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, | ||
| 156 | 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, | ||
| 157 | 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, | ||
| 158 | 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 159 | 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, | ||
| 160 | 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 161 | 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 162 | 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 163 | 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 164 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 165 | 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 166 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 167 | 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, | ||
| 168 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 169 | 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 170 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 171 | 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 172 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 173 | 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 174 | 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 175 | 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 176 | 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 177 | 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, | ||
| 178 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 179 | 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 180 | 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, | ||
| 181 | 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 182 | 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 183 | 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 184 | 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 185 | 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 186 | 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 187 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 188 | 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 189 | 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 190 | 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 191 | 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 192 | 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 193 | 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 194 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 195 | 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 196 | 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, | ||
| 197 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, | ||
| 198 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 199 | 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 200 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 201 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 202 | 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 203 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 204 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 205 | 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 206 | 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 207 | 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 208 | 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 209 | 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 210 | 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 211 | 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 212 | 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 213 | 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 214 | 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 215 | 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 216 | 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 217 | 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, | ||
| 218 | 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 219 | 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 220 | 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 221 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 222 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 223 | }; | ||
| 224 | |||
| 225 | // Quad indexed SPIR-V module. Generated from the "shaders/" directory. | ||
| 226 | constexpr u8 QUAD_INDEXED_SPV[] = { | ||
| 227 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, | ||
| 228 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 229 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 230 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 231 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 232 | 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 233 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 234 | 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 235 | 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 236 | 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 237 | 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 238 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 239 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 240 | 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 241 | 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 242 | 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 243 | 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 244 | 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 245 | 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 246 | 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 247 | 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 248 | 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 249 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 250 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 251 | 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||
| 252 | 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 253 | 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 254 | 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 255 | 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||
| 256 | 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 257 | 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 258 | 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 259 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 260 | 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 261 | 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 262 | 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 263 | 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 264 | 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 265 | 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 266 | 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 267 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 268 | 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 269 | 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 270 | 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, | ||
| 271 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 272 | 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 273 | 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 274 | 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 275 | 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 276 | 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 277 | 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, | ||
| 278 | 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, | ||
| 279 | 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 280 | 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 281 | 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||
| 282 | 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 283 | 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, | ||
| 284 | 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 285 | 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 286 | 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, | ||
| 287 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, | ||
| 288 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, | ||
| 289 | 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 290 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 291 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, | ||
| 292 | 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, | ||
| 293 | 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 294 | 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, | ||
| 295 | 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||
| 296 | 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 297 | 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 298 | 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 299 | 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 300 | 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, | ||
| 301 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 302 | 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 303 | 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 304 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 305 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 306 | 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 307 | 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 308 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 309 | 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||
| 310 | 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 311 | 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 312 | 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, | ||
| 313 | 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 314 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, | ||
| 315 | 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 316 | 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 317 | 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, | ||
| 318 | 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 319 | 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 320 | 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 321 | 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 322 | 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 323 | 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 324 | 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 325 | 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 326 | 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 327 | 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, | ||
| 328 | 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 329 | 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 330 | 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 331 | 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, | ||
| 332 | 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, | ||
| 333 | 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 334 | 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 335 | 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, | ||
| 336 | 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 337 | 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, | ||
| 338 | 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, | ||
| 339 | 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, | ||
| 340 | 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, | ||
| 341 | 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 342 | 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 343 | 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, | ||
| 344 | 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, | ||
| 345 | 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, | ||
| 346 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, | ||
| 347 | }; | ||
| 348 | |||
| 349 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | 57 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { |
| 350 | return {{ | 58 | return {{ |
| 351 | { | 59 | { |
| @@ -378,11 +86,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 378 | 86 | ||
| 379 | } // Anonymous namespace | 87 | } // Anonymous namespace |
| 380 | 88 | ||
| 381 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 89 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| 382 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 90 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 383 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 91 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 384 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, | 92 | vk::Span<VkPushConstantRange> push_constants, |
| 385 | const u8* code) { | 93 | std::span<const u32> code) { |
| 386 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | 94 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ |
| 387 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 95 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 388 | .pNext = nullptr, | 96 | .pNext = nullptr, |
| @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 390 | .bindingCount = bindings.size(), | 98 | .bindingCount = bindings.size(), |
| 391 | .pBindings = bindings.data(), | 99 | .pBindings = bindings.data(), |
| 392 | }); | 100 | }); |
| 393 | |||
| 394 | layout = device.GetLogical().CreatePipelineLayout({ | 101 | layout = device.GetLogical().CreatePipelineLayout({ |
| 395 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 102 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| 396 | .pNext = nullptr, | 103 | .pNext = nullptr, |
| @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 400 | .pushConstantRangeCount = push_constants.size(), | 107 | .pushConstantRangeCount = push_constants.size(), |
| 401 | .pPushConstantRanges = push_constants.data(), | 108 | .pPushConstantRanges = push_constants.data(), |
| 402 | }); | 109 | }); |
| 403 | |||
| 404 | if (!templates.empty()) { | 110 | if (!templates.empty()) { |
| 405 | descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | 111 | descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ |
| 406 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 112 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, |
| @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto | |||
| 417 | 123 | ||
| 418 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | 124 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); |
| 419 | } | 125 | } |
| 420 | |||
| 421 | auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); | ||
| 422 | std::memcpy(code_copy.get(), code, code_size); | ||
| 423 | |||
| 424 | module = device.GetLogical().CreateShaderModule({ | 126 | module = device.GetLogical().CreateShaderModule({ |
| 425 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 127 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| 426 | .pNext = nullptr, | 128 | .pNext = nullptr, |
| 427 | .flags = 0, | 129 | .flags = 0, |
| 428 | .codeSize = code_size, | 130 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 429 | .pCode = code_copy.get(), | 131 | .pCode = code.data(), |
| 430 | }); | 132 | }); |
| 431 | |||
| 432 | pipeline = device.GetLogical().CreateComputePipeline({ | 133 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 433 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 134 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 434 | .pNext = nullptr, | 135 | .pNext = nullptr, |
| @@ -461,13 +162,13 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( | |||
| 461 | return set; | 162 | return set; |
| 462 | } | 163 | } |
| 463 | 164 | ||
| 464 | QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, | 165 | QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, |
| 465 | VKDescriptorPool& descriptor_pool_, | 166 | VKDescriptorPool& descriptor_pool_, |
| 466 | VKStagingBufferPool& staging_buffer_pool_, | 167 | VKStagingBufferPool& staging_buffer_pool_, |
| 467 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 168 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 468 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | 169 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), |
| 469 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | 170 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), |
| 470 | BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), | 171 | BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), |
| 471 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 172 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 472 | update_descriptor_queue{update_descriptor_queue_} {} | 173 | update_descriptor_queue{update_descriptor_queue_} {} |
| 473 | 174 | ||
| @@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 | |||
| 510 | return {*buffer.handle, 0}; | 211 | return {*buffer.handle, 0}; |
| 511 | } | 212 | } |
| 512 | 213 | ||
| 513 | Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, | 214 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, |
| 514 | VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, | 215 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, |
| 515 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 216 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 516 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 217 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), |
| 517 | BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), | 218 | BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), |
| 518 | uint8_pass), | ||
| 519 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 219 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 520 | update_descriptor_queue{update_descriptor_queue_} {} | 220 | update_descriptor_queue{update_descriptor_queue_} {} |
| 521 | 221 | ||
| @@ -555,14 +255,13 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff | |||
| 555 | return {*buffer.handle, 0}; | 255 | return {*buffer.handle, 0}; |
| 556 | } | 256 | } |
| 557 | 257 | ||
| 558 | QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, | 258 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 559 | VKDescriptorPool& descriptor_pool_, | 259 | VKDescriptorPool& descriptor_pool_, |
| 560 | VKStagingBufferPool& staging_buffer_pool_, | 260 | VKStagingBufferPool& staging_buffer_pool_, |
| 561 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 261 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 562 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 262 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), |
| 563 | BuildInputOutputDescriptorUpdateTemplate(), | 263 | BuildInputOutputDescriptorUpdateTemplate(), |
| 564 | BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), | 264 | BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), |
| 565 | QUAD_INDEXED_SPV), | ||
| 566 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 265 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 567 | update_descriptor_queue{update_descriptor_queue_} {} | 266 | update_descriptor_queue{update_descriptor_queue_} {} |
| 568 | 267 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 2dc87902c..7ddb09afb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -5,27 +5,27 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <span> | ||
| 8 | #include <utility> | 9 | #include <utility> |
| 9 | 10 | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 15 | ||
| 15 | namespace Vulkan { | 16 | namespace Vulkan { |
| 16 | 17 | ||
| 17 | class VKDevice; | 18 | class Device; |
| 18 | class VKScheduler; | 19 | class VKScheduler; |
| 19 | class VKStagingBufferPool; | 20 | class VKStagingBufferPool; |
| 20 | class VKUpdateDescriptorQueue; | 21 | class VKUpdateDescriptorQueue; |
| 21 | 22 | ||
| 22 | class VKComputePass { | 23 | class VKComputePass { |
| 23 | public: | 24 | public: |
| 24 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 25 | explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| 25 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 26 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 26 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 27 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 27 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, | 28 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); |
| 28 | const u8* code); | ||
| 29 | ~VKComputePass(); | 29 | ~VKComputePass(); |
| 30 | 30 | ||
| 31 | protected: | 31 | protected: |
| @@ -43,7 +43,7 @@ private: | |||
| 43 | 43 | ||
| 44 | class QuadArrayPass final : public VKComputePass { | 44 | class QuadArrayPass final : public VKComputePass { |
| 45 | public: | 45 | public: |
| 46 | explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, | 46 | explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, |
| 47 | VKDescriptorPool& descriptor_pool_, | 47 | VKDescriptorPool& descriptor_pool_, |
| 48 | VKStagingBufferPool& staging_buffer_pool_, | 48 | VKStagingBufferPool& staging_buffer_pool_, |
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 49 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| @@ -59,7 +59,7 @@ private: | |||
| 59 | 59 | ||
| 60 | class Uint8Pass final : public VKComputePass { | 60 | class Uint8Pass final : public VKComputePass { |
| 61 | public: | 61 | public: |
| 62 | explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, | 62 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| 63 | VKDescriptorPool& descriptor_pool_, | 63 | VKDescriptorPool& descriptor_pool_, |
| 64 | VKStagingBufferPool& staging_buffer_pool_, | 64 | VKStagingBufferPool& staging_buffer_pool_, |
| 65 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 65 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| @@ -75,7 +75,7 @@ private: | |||
| 75 | 75 | ||
| 76 | class QuadIndexedPass final : public VKComputePass { | 76 | class QuadIndexedPass final : public VKComputePass { |
| 77 | public: | 77 | public: |
| 78 | explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, | 78 | explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 79 | VKDescriptorPool& descriptor_pool_, | 79 | VKDescriptorPool& descriptor_pool_, |
| 80 | VKStagingBufferPool& staging_buffer_pool_, | 80 | VKStagingBufferPool& staging_buffer_pool_, |
| 81 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 81 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 62f44d6da..3a48219b7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -6,16 +6,16 @@ | |||
| 6 | 6 | ||
| 7 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 7 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 12 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 13 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 15 | 15 | ||
| 16 | namespace Vulkan { | 16 | namespace Vulkan { |
| 17 | 17 | ||
| 18 | VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, | 18 | VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, |
| 19 | VKDescriptorPool& descriptor_pool_, | 19 | VKDescriptorPool& descriptor_pool_, |
| 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 21 | const SPIRVShader& shader_) | 21 | const SPIRVShader& shader_) |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 49e2113a2..7e16575ac 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -7,17 +7,17 @@ | |||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| 14 | class VKDevice; | 14 | class Device; |
| 15 | class VKScheduler; | 15 | class VKScheduler; |
| 16 | class VKUpdateDescriptorQueue; | 16 | class VKUpdateDescriptorQueue; |
| 17 | 17 | ||
| 18 | class VKComputePipeline final { | 18 | class VKComputePipeline final { |
| 19 | public: | 19 | public: |
| 20 | explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, | 20 | explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, |
| 21 | VKDescriptorPool& descriptor_pool_, | 21 | VKDescriptorPool& descriptor_pool_, |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 23 | const SPIRVShader& shader_); | 23 | const SPIRVShader& shader_); |
| @@ -48,7 +48,7 @@ private: | |||
| 48 | 48 | ||
| 49 | vk::Pipeline CreatePipeline() const; | 49 | vk::Pipeline CreatePipeline() const; |
| 50 | 50 | ||
| 51 | const VKDevice& device; | 51 | const Device& device; |
| 52 | VKScheduler& scheduler; | 52 | VKScheduler& scheduler; |
| 53 | ShaderEntries entries; | 53 | ShaderEntries entries; |
| 54 | 54 | ||
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index f38e089d5..ef9fb5910 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 9 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_device.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -32,7 +32,7 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { | |||
| 32 | descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); | 32 | descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) | 35 | VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) |
| 36 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ | 36 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ |
| 37 | AllocateNewPool()} {} | 37 | AllocateNewPool()} {} |
| 38 | 38 | ||
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 544f32a20..f892be7be 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h | |||
| @@ -7,11 +7,11 @@ | |||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 9 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| 14 | class VKDevice; | 14 | class Device; |
| 15 | class VKDescriptorPool; | 15 | class VKDescriptorPool; |
| 16 | class VKScheduler; | 16 | class VKScheduler; |
| 17 | 17 | ||
| @@ -39,7 +39,7 @@ class VKDescriptorPool final { | |||
| 39 | friend DescriptorAllocator; | 39 | friend DescriptorAllocator; |
| 40 | 40 | ||
| 41 | public: | 41 | public: |
| 42 | explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); | 42 | explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); |
| 43 | ~VKDescriptorPool(); | 43 | ~VKDescriptorPool(); |
| 44 | 44 | ||
| 45 | VKDescriptorPool(const VKDescriptorPool&) = delete; | 45 | VKDescriptorPool(const VKDescriptorPool&) = delete; |
| @@ -50,7 +50,7 @@ private: | |||
| 50 | 50 | ||
| 51 | vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); | 51 | vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); |
| 52 | 52 | ||
| 53 | const VKDevice& device; | 53 | const Device& device; |
| 54 | MasterSemaphore& master_semaphore; | 54 | MasterSemaphore& master_semaphore; |
| 55 | 55 | ||
| 56 | std::vector<vk::DescriptorPool> pools; | 56 | std::vector<vk::DescriptorPool> pools; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0bcaee714..4c5bc0aa1 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -6,19 +6,19 @@ | |||
| 6 | #include <thread> | 6 | #include <thread> |
| 7 | 7 | ||
| 8 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 8 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 9 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 12 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_device.h" |
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, | 17 | InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_, |
| 18 | bool is_stubbed_) | 18 | bool is_stubbed_) |
| 19 | : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} | 19 | : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} |
| 20 | 20 | ||
| 21 | InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, | 21 | InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_, |
| 22 | u32 payload_, bool is_stubbed_) | 22 | u32 payload_, bool is_stubbed_) |
| 23 | : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} | 23 | : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} |
| 24 | 24 | ||
| @@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const { | |||
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 75 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 76 | Tegra::MemoryManager& memory_manager_, | 76 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, |
| 77 | VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, | 77 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, |
| 78 | VKQueryCache& query_cache_, const VKDevice& device_, | 78 | const Device& device_, VKScheduler& scheduler_) |
| 79 | VKScheduler& scheduler_) | ||
| 80 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 79 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 81 | device{device_}, scheduler{scheduler_} {} | 80 | device{device_}, scheduler{scheduler_} {} |
| 82 | 81 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c8547cc24..6b51e4587 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -8,7 +8,8 @@ | |||
| 8 | 8 | ||
| 9 | #include "video_core/fence_manager.h" | 9 | #include "video_core/fence_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 12 | 13 | ||
| 13 | namespace Core { | 14 | namespace Core { |
| 14 | class System; | 15 | class System; |
| @@ -20,17 +21,16 @@ class RasterizerInterface; | |||
| 20 | 21 | ||
| 21 | namespace Vulkan { | 22 | namespace Vulkan { |
| 22 | 23 | ||
| 24 | class Device; | ||
| 23 | class VKBufferCache; | 25 | class VKBufferCache; |
| 24 | class VKDevice; | ||
| 25 | class VKQueryCache; | 26 | class VKQueryCache; |
| 26 | class VKScheduler; | 27 | class VKScheduler; |
| 27 | class VKTextureCache; | ||
| 28 | 28 | ||
| 29 | class InnerFence : public VideoCommon::FenceBase { | 29 | class InnerFence : public VideoCommon::FenceBase { |
| 30 | public: | 30 | public: |
| 31 | explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, | 31 | explicit InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_, |
| 32 | bool is_stubbed_); | 32 | bool is_stubbed_); |
| 33 | explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, | 33 | explicit InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_, |
| 34 | u32 payload_, bool is_stubbed_); | 34 | u32 payload_, bool is_stubbed_); |
| 35 | ~InnerFence(); | 35 | ~InnerFence(); |
| 36 | 36 | ||
| @@ -43,7 +43,7 @@ public: | |||
| 43 | private: | 43 | private: |
| 44 | bool IsEventSignalled() const; | 44 | bool IsEventSignalled() const; |
| 45 | 45 | ||
| 46 | const VKDevice& device; | 46 | const Device& device; |
| 47 | VKScheduler& scheduler; | 47 | VKScheduler& scheduler; |
| 48 | vk::Event event; | 48 | vk::Event event; |
| 49 | u64 ticks = 0; | 49 | u64 ticks = 0; |
| @@ -51,14 +51,14 @@ private: | |||
| 51 | using Fence = std::shared_ptr<InnerFence>; | 51 | using Fence = std::shared_ptr<InnerFence>; |
| 52 | 52 | ||
| 53 | using GenericFenceManager = | 53 | using GenericFenceManager = |
| 54 | VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; | 54 | VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; |
| 55 | 55 | ||
| 56 | class VKFenceManager final : public GenericFenceManager { | 56 | class VKFenceManager final : public GenericFenceManager { |
| 57 | public: | 57 | public: |
| 58 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 58 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 59 | Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, | 59 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, |
| 60 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 60 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, |
| 61 | const VKDevice& device_, VKScheduler& scheduler_); | 61 | const Device& device_, VKScheduler& scheduler_); |
| 62 | 62 | ||
| 63 | protected: | 63 | protected: |
| 64 | Fence CreateFence(u32 value, bool is_stubbed) override; | 64 | Fence CreateFence(u32 value, bool is_stubbed) override; |
| @@ -68,7 +68,7 @@ protected: | |||
| 68 | void WaitFence(Fence& fence) override; | 68 | void WaitFence(Fence& fence) override; |
| 69 | 69 | ||
| 70 | private: | 70 | private: |
| 71 | const VKDevice& device; | 71 | const Device& device; |
| 72 | VKScheduler& scheduler; | 72 | VKScheduler& scheduler; |
| 73 | }; | 73 | }; |
| 74 | 74 | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..a5214d0bc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -12,13 +12,12 @@ | |||
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 17 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 18 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 21 | #include "video_core/renderer_vulkan/wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_device.h" |
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 22 | 21 | ||
| 23 | namespace Vulkan { | 22 | namespace Vulkan { |
| 24 | 23 | ||
| @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 69 | }; | 68 | }; |
| 70 | } | 69 | } |
| 71 | 70 | ||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 72 | switch (msaa_mode) { | ||
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 74 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 72 | } // Anonymous namespace | 95 | } // Anonymous namespace |
| 73 | 96 | ||
| 74 | VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, | 97 | VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, |
| 75 | VKDescriptorPool& descriptor_pool_, | 98 | VKDescriptorPool& descriptor_pool_, |
| 76 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 77 | VKRenderPassCache& renderpass_cache_, | 100 | const GraphicsPipelineCacheKey& key, |
| 78 | const GraphicsPipelineCacheKey& key_, | 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 79 | vk::Span<VkDescriptorSetLayoutBinding> bindings_, | 102 | const SPIRVProgram& program, u32 num_color_buffers) |
| 80 | const SPIRVProgram& program_) | 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, |
| 81 | : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, | 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, |
| 82 | descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, | ||
| 83 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, |
| 84 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, |
| 85 | descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( | 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, |
| 86 | program_)}, | 108 | modules(CreateShaderModules(program)), |
| 87 | renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, | 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} |
| 88 | pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} | ||
| 89 | 110 | ||
| 90 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; |
| 91 | 112 | ||
| @@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||
| 179 | return shader_modules; | 200 | return shader_modules; |
| 180 | } | 201 | } |
| 181 | 202 | ||
| 182 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | 203 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, |
| 183 | const SPIRVProgram& program) const { | 204 | VkRenderPass renderpass, |
| 205 | u32 num_color_buffers) const { | ||
| 184 | const auto& state = cache_key.fixed_state; | 206 | const auto& state = cache_key.fixed_state; |
| 185 | const auto& viewport_swizzles = state.viewport_swizzles; | 207 | const auto& viewport_swizzles = state.viewport_swizzles; |
| 186 | 208 | ||
| @@ -190,11 +212,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 190 | // state is ignored | 212 | // state is ignored |
| 191 | dynamic.raw1 = 0; | 213 | dynamic.raw1 = 0; |
| 192 | dynamic.raw2 = 0; | 214 | dynamic.raw2 = 0; |
| 193 | for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { | 215 | dynamic.vertex_strides.fill(0); |
| 194 | // Enable all vertex bindings | ||
| 195 | binding.raw = 0; | ||
| 196 | binding.enabled.Assign(1); | ||
| 197 | } | ||
| 198 | } else { | 216 | } else { |
| 199 | dynamic = state.dynamic_state; | 217 | dynamic = state.dynamic_state; |
| 200 | } | 218 | } |
| @@ -202,19 +220,16 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 202 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | 220 | std::vector<VkVertexInputBindingDescription> vertex_bindings; |
| 203 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | 221 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; |
| 204 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 222 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 205 | const auto& binding = dynamic.vertex_bindings[index]; | 223 | if (state.attributes[index].binding_index_enabled == 0) { |
| 206 | if (!binding.enabled) { | ||
| 207 | continue; | 224 | continue; |
| 208 | } | 225 | } |
| 209 | const bool instanced = state.binding_divisors[index] != 0; | 226 | const bool instanced = state.binding_divisors[index] != 0; |
| 210 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | 227 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; |
| 211 | |||
| 212 | vertex_bindings.push_back({ | 228 | vertex_bindings.push_back({ |
| 213 | .binding = static_cast<u32>(index), | 229 | .binding = static_cast<u32>(index), |
| 214 | .stride = binding.stride, | 230 | .stride = dynamic.vertex_strides[index], |
| 215 | .inputRate = rate, | 231 | .inputRate = rate, |
| 216 | }); | 232 | }); |
| 217 | |||
| 218 | if (instanced) { | 233 | if (instanced) { |
| 219 | vertex_binding_divisors.push_back({ | 234 | vertex_binding_divisors.push_back({ |
| 220 | .binding = static_cast<u32>(index), | 235 | .binding = static_cast<u32>(index), |
| @@ -290,8 +305,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 290 | }; | 305 | }; |
| 291 | 306 | ||
| 292 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | 307 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; |
| 293 | std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), | 308 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); |
| 294 | UnpackViewportSwizzle); | ||
| 295 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | 309 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ |
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | 310 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, |
| 297 | .pNext = nullptr, | 311 | .pNext = nullptr, |
| @@ -326,7 +340,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 326 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | 340 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| 327 | .pNext = nullptr, | 341 | .pNext = nullptr, |
| 328 | .flags = 0, | 342 | .flags = 0, |
| 329 | .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, | 343 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), |
| 330 | .sampleShadingEnable = VK_FALSE, | 344 | .sampleShadingEnable = VK_FALSE, |
| 331 | .minSampleShading = 0.0f, | 345 | .minSampleShading = 0.0f, |
| 332 | .pSampleMask = nullptr, | 346 | .pSampleMask = nullptr, |
| @@ -352,8 +366,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 352 | }; | 366 | }; |
| 353 | 367 | ||
| 354 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 368 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 355 | const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); | 369 | for (std::size_t index = 0; index < num_color_buffers; ++index) { |
| 356 | for (std::size_t index = 0; index < num_attachments; ++index) { | ||
| 357 | static constexpr std::array COMPONENT_TABLE{ | 370 | static constexpr std::array COMPONENT_TABLE{ |
| 358 | VK_COLOR_COMPONENT_R_BIT, | 371 | VK_COLOR_COMPONENT_R_BIT, |
| 359 | VK_COLOR_COMPONENT_G_BIT, | 372 | VK_COLOR_COMPONENT_G_BIT, |
| @@ -387,7 +400,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 387 | .flags = 0, | 400 | .flags = 0, |
| 388 | .logicOpEnable = VK_FALSE, | 401 | .logicOpEnable = VK_FALSE, |
| 389 | .logicOp = VK_LOGIC_OP_COPY, | 402 | .logicOp = VK_LOGIC_OP_COPY, |
| 390 | .attachmentCount = static_cast<u32>(num_attachments), | 403 | .attachmentCount = num_color_buffers, |
| 391 | .pAttachments = cb_attachments.data(), | 404 | .pAttachments = cb_attachments.data(), |
| 392 | .blendConstants = {}, | 405 | .blendConstants = {}, |
| 393 | }; | 406 | }; |
| @@ -447,8 +460,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 447 | stage_ci.pNext = &subgroup_size_ci; | 460 | stage_ci.pNext = &subgroup_size_ci; |
| 448 | } | 461 | } |
| 449 | } | 462 | } |
| 450 | 463 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | |
| 451 | const VkGraphicsPipelineCreateInfo ci{ | ||
| 452 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 464 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 453 | .pNext = nullptr, | 465 | .pNext = nullptr, |
| 454 | .flags = 0, | 466 | .flags = 0, |
| @@ -468,8 +480,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 468 | .subpass = 0, | 480 | .subpass = 0, |
| 469 | .basePipelineHandle = nullptr, | 481 | .basePipelineHandle = nullptr, |
| 470 | .basePipelineIndex = 0, | 482 | .basePipelineIndex = 0, |
| 471 | }; | 483 | }); |
| 472 | return device.GetLogical().CreateGraphicsPipeline(ci); | ||
| 473 | } | 484 | } |
| 474 | 485 | ||
| 475 | } // namespace Vulkan | 486 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3fb31d55a..8b6a98fe0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -8,20 +8,19 @@ | |||
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 14 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 21 | 21 | ||
| 22 | struct GraphicsPipelineCacheKey { | 22 | struct GraphicsPipelineCacheKey { |
| 23 | RenderPassParams renderpass_params; | 23 | VkRenderPass renderpass; |
| 24 | u32 padding; | ||
| 25 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; |
| 26 | FixedPipelineState fixed_state; | 25 | FixedPipelineState fixed_state; |
| 27 | 26 | ||
| @@ -34,16 +33,15 @@ struct GraphicsPipelineCacheKey { | |||
| 34 | } | 33 | } |
| 35 | 34 | ||
| 36 | std::size_t Size() const noexcept { | 35 | std::size_t Size() const noexcept { |
| 37 | return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); | 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); |
| 38 | } | 37 | } |
| 39 | }; | 38 | }; |
| 40 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |
| 41 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | 40 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); |
| 42 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | 41 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); |
| 43 | 42 | ||
| 43 | class Device; | ||
| 44 | class VKDescriptorPool; | 44 | class VKDescriptorPool; |
| 45 | class VKDevice; | ||
| 46 | class VKRenderPassCache; | ||
| 47 | class VKScheduler; | 45 | class VKScheduler; |
| 48 | class VKUpdateDescriptorQueue; | 46 | class VKUpdateDescriptorQueue; |
| 49 | 47 | ||
| @@ -51,13 +49,12 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt | |||
| 51 | 49 | ||
| 52 | class VKGraphicsPipeline final { | 50 | class VKGraphicsPipeline final { |
| 53 | public: | 51 | public: |
| 54 | explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, | 52 | explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, |
| 55 | VKDescriptorPool& descriptor_pool_, | 53 | VKDescriptorPool& descriptor_pool, |
| 56 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 57 | VKRenderPassCache& renderpass_cache_, | 55 | const GraphicsPipelineCacheKey& key, |
| 58 | const GraphicsPipelineCacheKey& key_, | 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 59 | vk::Span<VkDescriptorSetLayoutBinding> bindings_, | 57 | const SPIRVProgram& program, u32 num_color_buffers); |
| 60 | const SPIRVProgram& program_); | ||
| 61 | ~VKGraphicsPipeline(); | 58 | ~VKGraphicsPipeline(); |
| 62 | 59 | ||
| 63 | VkDescriptorSet CommitDescriptorSet(); | 60 | VkDescriptorSet CommitDescriptorSet(); |
| @@ -70,10 +67,6 @@ public: | |||
| 70 | return *layout; | 67 | return *layout; |
| 71 | } | 68 | } |
| 72 | 69 | ||
| 73 | VkRenderPass GetRenderPass() const { | ||
| 74 | return renderpass; | ||
| 75 | } | ||
| 76 | |||
| 77 | GraphicsPipelineCacheKey GetCacheKey() const { | 70 | GraphicsPipelineCacheKey GetCacheKey() const { |
| 78 | return cache_key; | 71 | return cache_key; |
| 79 | } | 72 | } |
| @@ -89,10 +82,10 @@ private: | |||
| 89 | 82 | ||
| 90 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; |
| 91 | 84 | ||
| 92 | vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, | 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, |
| 93 | const SPIRVProgram& program) const; | 86 | u32 num_color_buffers) const; |
| 94 | 87 | ||
| 95 | const VKDevice& device; | 88 | const Device& device; |
| 96 | VKScheduler& scheduler; | 89 | VKScheduler& scheduler; |
| 97 | const GraphicsPipelineCacheKey cache_key; | 90 | const GraphicsPipelineCacheKey cache_key; |
| 98 | const u64 hash; | 91 | const u64 hash; |
| @@ -104,7 +97,6 @@ private: | |||
| 104 | vk::DescriptorUpdateTemplateKHR descriptor_template; | 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 105 | std::vector<vk::ShaderModule> modules; | 98 | std::vector<vk::ShaderModule> modules; |
| 106 | 99 | ||
| 107 | VkRenderPass renderpass; | ||
| 108 | vk::Pipeline pipeline; | 100 | vk::Pipeline pipeline; |
| 109 | }; | 101 | }; |
| 110 | 102 | ||
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 072d14e3b..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null | |||
| @@ -1,135 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, | ||
| 17 | const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) | ||
| 18 | : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, | ||
| 19 | image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { | ||
| 20 | UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, | ||
| 21 | "Queue family tracking is not implemented"); | ||
| 22 | |||
| 23 | image = device_.GetLogical().CreateImage(image_ci_); | ||
| 24 | |||
| 25 | const u32 num_ranges = image_num_layers * image_num_levels; | ||
| 26 | barriers.resize(num_ranges); | ||
| 27 | subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); | ||
| 28 | } | ||
| 29 | |||
| 30 | VKImage::~VKImage() = default; | ||
| 31 | |||
| 32 | void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 33 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | ||
| 34 | VkImageLayout new_layout) { | ||
| 35 | if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { | ||
| 36 | return; | ||
| 37 | } | ||
| 38 | |||
| 39 | std::size_t cursor = 0; | ||
| 40 | for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { | ||
| 41 | for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { | ||
| 42 | const u32 layer = base_layer + layer_it; | ||
| 43 | const u32 level = base_level + level_it; | ||
| 44 | auto& state = GetSubrangeState(layer, level); | ||
| 45 | auto& barrier = barriers[cursor]; | ||
| 46 | barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; | ||
| 47 | barrier.pNext = nullptr; | ||
| 48 | barrier.srcAccessMask = state.access; | ||
| 49 | barrier.dstAccessMask = new_access; | ||
| 50 | barrier.oldLayout = state.layout; | ||
| 51 | barrier.newLayout = new_layout; | ||
| 52 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 53 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 54 | barrier.image = *image; | ||
| 55 | barrier.subresourceRange.aspectMask = aspect_mask; | ||
| 56 | barrier.subresourceRange.baseMipLevel = level; | ||
| 57 | barrier.subresourceRange.levelCount = 1; | ||
| 58 | barrier.subresourceRange.baseArrayLayer = layer; | ||
| 59 | barrier.subresourceRange.layerCount = 1; | ||
| 60 | state.access = new_access; | ||
| 61 | state.layout = new_layout; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 66 | |||
| 67 | scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { | ||
| 68 | // TODO(Rodrigo): Implement a way to use the latest stage across subresources. | ||
| 69 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 70 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, | ||
| 71 | vk::Span(barriers.data(), cursor)); | ||
| 72 | }); | ||
| 73 | } | ||
| 74 | |||
| 75 | bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 76 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept { | ||
| 77 | const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && | ||
| 78 | base_level == 0 && num_levels == image_num_levels; | ||
| 79 | if (!is_full_range) { | ||
| 80 | state_diverged = true; | ||
| 81 | } | ||
| 82 | |||
| 83 | if (!state_diverged) { | ||
| 84 | auto& state = GetSubrangeState(0, 0); | ||
| 85 | if (state.access != new_access || state.layout != new_layout) { | ||
| 86 | return true; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { | ||
| 91 | for (u32 level_it = 0; level_it < num_levels; ++level_it) { | ||
| 92 | const u32 layer = base_layer + layer_it; | ||
| 93 | const u32 level = base_level + level_it; | ||
| 94 | auto& state = GetSubrangeState(layer, level); | ||
| 95 | if (state.access != new_access || state.layout != new_layout) { | ||
| 96 | return true; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | |||
| 103 | void VKImage::CreatePresentView() { | ||
| 104 | // Image type has to be 2D to be presented. | ||
| 105 | present_view = device.GetLogical().CreateImageView({ | ||
| 106 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 107 | .pNext = nullptr, | ||
| 108 | .flags = 0, | ||
| 109 | .image = *image, | ||
| 110 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 111 | .format = format, | ||
| 112 | .components = | ||
| 113 | { | ||
| 114 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 115 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 116 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 117 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 118 | }, | ||
| 119 | .subresourceRange = | ||
| 120 | { | ||
| 121 | .aspectMask = aspect_mask, | ||
| 122 | .baseMipLevel = 0, | ||
| 123 | .levelCount = 1, | ||
| 124 | .baseArrayLayer = 0, | ||
| 125 | .layerCount = 1, | ||
| 126 | }, | ||
| 127 | }); | ||
| 128 | } | ||
| 129 | |||
| 130 | VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { | ||
| 131 | return subrange_states[static_cast<std::size_t>(layer * image_num_levels) + | ||
| 132 | static_cast<std::size_t>(level)]; | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Vulkan \ No newline at end of file | ||
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index 287ab90ca..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null | |||
| @@ -1,84 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 12 | |||
| 13 | namespace Vulkan { | ||
| 14 | |||
| 15 | class VKDevice; | ||
| 16 | class VKScheduler; | ||
| 17 | |||
| 18 | class VKImage { | ||
| 19 | public: | ||
| 20 | explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, | ||
| 21 | const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); | ||
| 22 | ~VKImage(); | ||
| 23 | |||
| 24 | /// Records in the passed command buffer an image transition and updates the state of the image. | ||
| 25 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 26 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | ||
| 27 | VkImageLayout new_layout); | ||
| 28 | |||
| 29 | /// Returns a view compatible with presentation, the image has to be 2D. | ||
| 30 | VkImageView GetPresentView() { | ||
| 31 | if (!present_view) { | ||
| 32 | CreatePresentView(); | ||
| 33 | } | ||
| 34 | return *present_view; | ||
| 35 | } | ||
| 36 | |||
| 37 | /// Returns the Vulkan image handler. | ||
| 38 | const vk::Image& GetHandle() const { | ||
| 39 | return image; | ||
| 40 | } | ||
| 41 | |||
| 42 | /// Returns the Vulkan format for this image. | ||
| 43 | VkFormat GetFormat() const { | ||
| 44 | return format; | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns the Vulkan aspect mask. | ||
| 48 | VkImageAspectFlags GetAspectMask() const { | ||
| 49 | return aspect_mask; | ||
| 50 | } | ||
| 51 | |||
| 52 | private: | ||
| 53 | struct SubrangeState final { | ||
| 54 | VkAccessFlags access = 0; ///< Current access bits. | ||
| 55 | VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. | ||
| 56 | }; | ||
| 57 | |||
| 58 | bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 59 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept; | ||
| 60 | |||
| 61 | /// Creates a presentation view. | ||
| 62 | void CreatePresentView(); | ||
| 63 | |||
| 64 | /// Returns the subrange state for a layer and layer. | ||
| 65 | SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; | ||
| 66 | |||
| 67 | const VKDevice& device; ///< Device handler. | ||
| 68 | VKScheduler& scheduler; ///< Device scheduler. | ||
| 69 | |||
| 70 | const VkFormat format; ///< Vulkan format. | ||
| 71 | const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. | ||
| 72 | const u32 image_num_layers; ///< Number of layers. | ||
| 73 | const u32 image_num_levels; ///< Number of mipmap levels. | ||
| 74 | |||
| 75 | vk::Image image; ///< Image handle. | ||
| 76 | vk::ImageView present_view; ///< Image view compatible with presentation. | ||
| 77 | |||
| 78 | std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. | ||
| 79 | std::vector<SubrangeState> subrange_states; ///< Current subrange state. | ||
| 80 | |||
| 81 | bool state_diverged = false; ///< True when subresources mismatch in layout. | ||
| 82 | }; | ||
| 83 | |||
| 84 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index ae26e558d..56ec5e380 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp | |||
| @@ -6,15 +6,15 @@ | |||
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | 7 | ||
| 8 | #include "core/settings.h" | 8 | #include "core/settings.h" |
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 9 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_device.h" |
| 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | using namespace std::chrono_literals; | 15 | using namespace std::chrono_literals; |
| 16 | 16 | ||
| 17 | MasterSemaphore::MasterSemaphore(const VKDevice& device) { | 17 | MasterSemaphore::MasterSemaphore(const Device& device) { |
| 18 | static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ | 18 | static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ |
| 19 | .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, | 19 | .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, |
| 20 | .pNext = nullptr, | 20 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0e93706d7..f336f1862 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -8,15 +8,15 @@ | |||
| 8 | #include <thread> | 8 | #include <thread> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | class VKDevice; | 15 | class Device; |
| 16 | 16 | ||
| 17 | class MasterSemaphore { | 17 | class MasterSemaphore { |
| 18 | public: | 18 | public: |
| 19 | explicit MasterSemaphore(const VKDevice& device); | 19 | explicit MasterSemaphore(const Device& device); |
| 20 | ~MasterSemaphore(); | 20 | ~MasterSemaphore(); |
| 21 | 21 | ||
| 22 | /// Returns the current logical tick. | 22 | /// Returns the current logical tick. |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index be53d450f..a6abd0eee 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp | |||
| @@ -11,9 +11,9 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 14 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_device.h" |
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -29,7 +29,7 @@ u64 GetAllocationChunkSize(u64 required_size) { | |||
| 29 | 29 | ||
| 30 | class VKMemoryAllocation final { | 30 | class VKMemoryAllocation final { |
| 31 | public: | 31 | public: |
| 32 | explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, | 32 | explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_, |
| 33 | VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) | 33 | VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) |
| 34 | : device{device_}, memory{std::move(memory_)}, properties{properties_}, | 34 | : device{device_}, memory{std::move(memory_)}, properties{properties_}, |
| 35 | allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} | 35 | allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} |
| @@ -104,7 +104,7 @@ private: | |||
| 104 | return std::nullopt; | 104 | return std::nullopt; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | const VKDevice& device; ///< Vulkan device. | 107 | const Device& device; ///< Vulkan device. |
| 108 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. | 108 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. |
| 109 | const VkMemoryPropertyFlags properties; ///< Vulkan properties. | 109 | const VkMemoryPropertyFlags properties; ///< Vulkan properties. |
| 110 | const u64 allocation_size; ///< Size of this allocation. | 110 | const u64 allocation_size; ///< Size of this allocation. |
| @@ -117,7 +117,7 @@ private: | |||
| 117 | std::vector<const VKMemoryCommitImpl*> commits; | 117 | std::vector<const VKMemoryCommitImpl*> commits; |
| 118 | }; | 118 | }; |
| 119 | 119 | ||
| 120 | VKMemoryManager::VKMemoryManager(const VKDevice& device_) | 120 | VKMemoryManager::VKMemoryManager(const Device& device_) |
| 121 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} | 121 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} |
| 122 | 122 | ||
| 123 | VKMemoryManager::~VKMemoryManager() = default; | 123 | VKMemoryManager::~VKMemoryManager() = default; |
| @@ -207,7 +207,7 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi | |||
| 207 | return {}; | 207 | return {}; |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, | 210 | VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, |
| 211 | const vk::DeviceMemory& memory_, u64 begin_, u64 end_) | 211 | const vk::DeviceMemory& memory_, u64 begin_, u64 end_) |
| 212 | : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} | 212 | : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} |
| 213 | 213 | ||
| @@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() { | |||
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { | 218 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { |
| 219 | return MemoryMap{this, memory.Map(interval.first + offset_, size)}; | 219 | return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size)); |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | void VKMemoryCommitImpl::Unmap() const { | 222 | void VKMemoryCommitImpl::Unmap() const { |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 39f903ec8..2452bca4e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h | |||
| @@ -5,15 +5,16 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <span> | ||
| 8 | #include <utility> | 9 | #include <utility> |
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 12 | 13 | ||
| 13 | namespace Vulkan { | 14 | namespace Vulkan { |
| 14 | 15 | ||
| 16 | class Device; | ||
| 15 | class MemoryMap; | 17 | class MemoryMap; |
| 16 | class VKDevice; | ||
| 17 | class VKMemoryAllocation; | 18 | class VKMemoryAllocation; |
| 18 | class VKMemoryCommitImpl; | 19 | class VKMemoryCommitImpl; |
| 19 | 20 | ||
| @@ -21,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; | |||
| 21 | 22 | ||
| 22 | class VKMemoryManager final { | 23 | class VKMemoryManager final { |
| 23 | public: | 24 | public: |
| 24 | explicit VKMemoryManager(const VKDevice& device_); | 25 | explicit VKMemoryManager(const Device& device_); |
| 25 | VKMemoryManager(const VKMemoryManager&) = delete; | 26 | VKMemoryManager(const VKMemoryManager&) = delete; |
| 26 | ~VKMemoryManager(); | 27 | ~VKMemoryManager(); |
| 27 | 28 | ||
| @@ -48,7 +49,7 @@ private: | |||
| 48 | VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, | 49 | VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, |
| 49 | VkMemoryPropertyFlags wanted_properties); | 50 | VkMemoryPropertyFlags wanted_properties); |
| 50 | 51 | ||
| 51 | const VKDevice& device; ///< Device handler. | 52 | const Device& device; ///< Device handler. |
| 52 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. | 53 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. |
| 53 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. | 54 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. |
| 54 | }; | 55 | }; |
| @@ -58,7 +59,7 @@ class VKMemoryCommitImpl final { | |||
| 58 | friend MemoryMap; | 59 | friend MemoryMap; |
| 59 | 60 | ||
| 60 | public: | 61 | public: |
| 61 | explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, | 62 | explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_, |
| 62 | const vk::DeviceMemory& memory_, u64 begin_, u64 end_); | 63 | const vk::DeviceMemory& memory_, u64 begin_, u64 end_); |
| 63 | ~VKMemoryCommitImpl(); | 64 | ~VKMemoryCommitImpl(); |
| 64 | 65 | ||
| @@ -84,7 +85,7 @@ private: | |||
| 84 | /// Unmaps memory. | 85 | /// Unmaps memory. |
| 85 | void Unmap() const; | 86 | void Unmap() const; |
| 86 | 87 | ||
| 87 | const VKDevice& device; ///< Vulkan device. | 88 | const Device& device; ///< Vulkan device. |
| 88 | const vk::DeviceMemory& memory; ///< Vulkan device memory handler. | 89 | const vk::DeviceMemory& memory; ///< Vulkan device memory handler. |
| 89 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. | 90 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. |
| 90 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. | 91 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. |
| @@ -93,8 +94,8 @@ private: | |||
| 93 | /// Holds ownership of a memory map. | 94 | /// Holds ownership of a memory map. |
| 94 | class MemoryMap final { | 95 | class MemoryMap final { |
| 95 | public: | 96 | public: |
| 96 | explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) | 97 | explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_) |
| 97 | : commit{commit_}, address{address_} {} | 98 | : commit{commit_}, span{span_} {} |
| 98 | 99 | ||
| 99 | ~MemoryMap() { | 100 | ~MemoryMap() { |
| 100 | if (commit) { | 101 | if (commit) { |
| @@ -108,19 +109,24 @@ public: | |||
| 108 | commit = nullptr; | 109 | commit = nullptr; |
| 109 | } | 110 | } |
| 110 | 111 | ||
| 112 | /// Returns a span to the memory map. | ||
| 113 | [[nodiscard]] std::span<u8> Span() const noexcept { | ||
| 114 | return span; | ||
| 115 | } | ||
| 116 | |||
| 111 | /// Returns the address of the memory map. | 117 | /// Returns the address of the memory map. |
| 112 | u8* GetAddress() const { | 118 | [[nodiscard]] u8* Address() const noexcept { |
| 113 | return address; | 119 | return span.data(); |
| 114 | } | 120 | } |
| 115 | 121 | ||
| 116 | /// Returns the address of the memory map; | 122 | /// Returns the address of the memory map; |
| 117 | operator u8*() const { | 123 | [[nodiscard]] operator u8*() const noexcept { |
| 118 | return address; | 124 | return span.data(); |
| 119 | } | 125 | } |
| 120 | 126 | ||
| 121 | private: | 127 | private: |
| 122 | const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. | 128 | const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. |
| 123 | u8* address{}; ///< Address to the mapped memory. | 129 | std::span<u8> span; ///< Address to the mapped memory. |
| 124 | }; | 130 | }; |
| 125 | 131 | ||
| 126 | } // namespace Vulkan | 132 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3fb264d03..02282e36f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 11 | #include "common/cityhash.h" | ||
| 11 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 12 | #include "core/core.h" | 13 | #include "core/core.h" |
| 13 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| @@ -18,18 +19,17 @@ | |||
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 19 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 20 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 21 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 28 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 29 | #include "video_core/shader/compiler_settings.h" | 27 | #include "video_core/shader/compiler_settings.h" |
| 30 | #include "video_core/shader/memory_util.h" | 28 | #include "video_core/shader/memory_util.h" |
| 31 | #include "video_core/shader_cache.h" | 29 | #include "video_core/shader_cache.h" |
| 32 | #include "video_core/shader_notify.h" | 30 | #include "video_core/shader_notify.h" |
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 33 | 33 | ||
| 34 | namespace Vulkan { | 34 | namespace Vulkan { |
| 35 | 35 | ||
| @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX | |||
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |
| 53 | 53 | ||
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| 55 | VideoCommon::Shader::CompileDepth::FullDecompile}; | 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, |
| 56 | .disable_else_derivation = true, | ||
| 57 | }; | ||
| 56 | 58 | ||
| 57 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { |
| 58 | return program == 0 ? 0 : program - 1; | 60 | return program == 0 ? 0 : program - 1; |
| @@ -147,14 +149,13 @@ Shader::~Shader() = default; | |||
| 147 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 149 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, |
| 148 | Tegra::Engines::Maxwell3D& maxwell3d_, | 150 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 149 | Tegra::Engines::KeplerCompute& kepler_compute_, | 151 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 150 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 152 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 151 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |
| 152 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 153 | VKRenderPassCache& renderpass_cache_) | 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |
| 154 | : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |
| 155 | gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, | 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ |
| 156 | descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, | 158 | update_descriptor_queue_} {} |
| 157 | renderpass_cache{renderpass_cache_} {} | ||
| 158 | 159 | ||
| 159 | VKPipelineCache::~VKPipelineCache() = default; | 160 | VKPipelineCache::~VKPipelineCache() = default; |
| 160 | 161 | ||
| @@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 199 | } | 200 | } |
| 200 | 201 | ||
| 201 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( |
| 202 | const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { | 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, |
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 203 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 204 | 206 | ||
| 205 | if (last_graphics_pipeline && last_graphics_key == key) { | 207 | if (last_graphics_pipeline && last_graphics_key == key) { |
| @@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | |||
| 215 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 216 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); |
| 217 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, |
| 218 | update_descriptor_queue, renderpass_cache, bindings, | 220 | update_descriptor_queue, bindings, program, key, |
| 219 | program, key); | 221 | num_color_buffers); |
| 220 | } | 222 | } |
| 221 | last_graphics_pipeline = pair->second.get(); | 223 | last_graphics_pipeline = pair->second.get(); |
| 222 | return last_graphics_pipeline; | 224 | return last_graphics_pipeline; |
| @@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | |||
| 229 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 230 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); |
| 231 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, |
| 232 | update_descriptor_queue, renderpass_cache, key, | 234 | update_descriptor_queue, key, bindings, |
| 233 | bindings, program); | 235 | program, num_color_buffers); |
| 234 | gpu.ShaderNotify().MarkShaderComplete(); | 236 | gpu.ShaderNotify().MarkShaderComplete(); |
| 235 | } | 237 | } |
| 236 | last_graphics_pipeline = entry.get(); | 238 | last_graphics_pipeline = entry.get(); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 9e1f8fcbb..89d635a3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -19,14 +19,13 @@ | |||
| 19 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 22 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 24 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 25 | #include "video_core/shader/async_shaders.h" | 23 | #include "video_core/shader/async_shaders.h" |
| 26 | #include "video_core/shader/memory_util.h" | 24 | #include "video_core/shader/memory_util.h" |
| 27 | #include "video_core/shader/registry.h" | 25 | #include "video_core/shader/registry.h" |
| 28 | #include "video_core/shader/shader_ir.h" | 26 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader_cache.h" | 27 | #include "video_core/shader_cache.h" |
| 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 30 | 29 | ||
| 31 | namespace Core { | 30 | namespace Core { |
| 32 | class System; | 31 | class System; |
| @@ -34,10 +33,10 @@ class System; | |||
| 34 | 33 | ||
| 35 | namespace Vulkan { | 34 | namespace Vulkan { |
| 36 | 35 | ||
| 36 | class Device; | ||
| 37 | class RasterizerVulkan; | 37 | class RasterizerVulkan; |
| 38 | class VKComputePipeline; | 38 | class VKComputePipeline; |
| 39 | class VKDescriptorPool; | 39 | class VKDescriptorPool; |
| 40 | class VKDevice; | ||
| 41 | class VKScheduler; | 40 | class VKScheduler; |
| 42 | class VKUpdateDescriptorQueue; | 41 | class VKUpdateDescriptorQueue; |
| 43 | 42 | ||
| @@ -119,18 +118,18 @@ private: | |||
| 119 | 118 | ||
| 120 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 121 | public: | 120 | public: |
| 122 | explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, |
| 123 | Tegra::Engines::Maxwell3D& maxwell3d_, | 122 | Tegra::Engines::Maxwell3D& maxwell3d, |
| 124 | Tegra::Engines::KeplerCompute& kepler_compute_, | 123 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 125 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 124 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 126 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |
| 127 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 126 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 128 | VKRenderPassCache& renderpass_cache_); | ||
| 129 | ~VKPipelineCache() override; | 127 | ~VKPipelineCache() override; |
| 130 | 128 | ||
| 131 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |
| 132 | 130 | ||
| 133 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, |
| 132 | u32 num_color_buffers, | ||
| 134 | VideoCommon::Shader::AsyncShaders& async_shaders); | 133 | VideoCommon::Shader::AsyncShaders& async_shaders); |
| 135 | 134 | ||
| 136 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| @@ -149,11 +148,10 @@ private: | |||
| 149 | Tegra::Engines::KeplerCompute& kepler_compute; | 148 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 150 | Tegra::MemoryManager& gpu_memory; | 149 | Tegra::MemoryManager& gpu_memory; |
| 151 | 150 | ||
| 152 | const VKDevice& device; | 151 | const Device& device; |
| 153 | VKScheduler& scheduler; | 152 | VKScheduler& scheduler; |
| 154 | VKDescriptorPool& descriptor_pool; | 153 | VKDescriptorPool& descriptor_pool; |
| 155 | VKUpdateDescriptorQueue& update_descriptor_queue; | 154 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 156 | VKRenderPassCache& renderpass_cache; | ||
| 157 | 155 | ||
| 158 | std::unique_ptr<Shader> null_shader; | 156 | std::unique_ptr<Shader> null_shader; |
| 159 | std::unique_ptr<Shader> null_kernel; | 157 | std::unique_ptr<Shader> null_kernel; |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 038760de3..7cadd5147 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -7,11 +7,11 @@ | |||
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 12 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 15 | 15 | ||
| 16 | namespace Vulkan { | 16 | namespace Vulkan { |
| 17 | 17 | ||
| @@ -27,7 +27,7 @@ constexpr VkQueryType GetTarget(QueryType type) { | |||
| 27 | 27 | ||
| 28 | } // Anonymous namespace | 28 | } // Anonymous namespace |
| 29 | 29 | ||
| 30 | QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) | 30 | QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_) |
| 31 | : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} | 31 | : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} |
| 32 | 32 | ||
| 33 | QueryPool::~QueryPool() = default; | 33 | QueryPool::~QueryPool() = default; |
| @@ -68,7 +68,7 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | |||
| 68 | 68 | ||
| 69 | VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, | 69 | VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 70 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 70 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, |
| 71 | const VKDevice& device_, VKScheduler& scheduler_) | 71 | const Device& device_, VKScheduler& scheduler_) |
| 72 | : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, | 72 | : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, |
| 73 | query_pools{ | 73 | query_pools{ |
| 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, |
| @@ -96,9 +96,9 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { | |||
| 96 | HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | 96 | HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, |
| 97 | QueryType type_) | 97 | QueryType type_) |
| 98 | : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, | 98 | : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, |
| 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { | 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { |
| 100 | const vk::Device* logical = &cache_.Device().GetLogical(); | 100 | const vk::Device* logical = &cache.GetDevice().GetLogical(); |
| 101 | cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { | 101 | cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { |
| 102 | logical->ResetQueryPoolEXT(query.first, query.second, 1); | 102 | logical->ResetQueryPoolEXT(query.first, query.second, 1); |
| 103 | cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); | 103 | cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); |
| 104 | }); | 104 | }); |
| @@ -109,17 +109,17 @@ HostCounter::~HostCounter() { | |||
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | void HostCounter::EndQuery() { | 111 | void HostCounter::EndQuery() { |
| 112 | cache.Scheduler().Record( | 112 | cache.GetScheduler().Record( |
| 113 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); | 113 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | u64 HostCounter::BlockingQuery() const { | 116 | u64 HostCounter::BlockingQuery() const { |
| 117 | if (tick >= cache.Scheduler().CurrentTick()) { | 117 | if (tick >= cache.GetScheduler().CurrentTick()) { |
| 118 | cache.Scheduler().Flush(); | 118 | cache.GetScheduler().Flush(); |
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | u64 data; | 121 | u64 data; |
| 122 | const VkResult query_result = cache.Device().GetLogical().GetQueryResults( | 122 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( |
| 123 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 123 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), |
| 124 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | 124 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); |
| 125 | 125 | ||
| @@ -127,7 +127,7 @@ u64 HostCounter::BlockingQuery() const { | |||
| 127 | case VK_SUCCESS: | 127 | case VK_SUCCESS: |
| 128 | return data; | 128 | return data; |
| 129 | case VK_ERROR_DEVICE_LOST: | 129 | case VK_ERROR_DEVICE_LOST: |
| 130 | cache.Device().ReportLoss(); | 130 | cache.GetDevice().ReportLoss(); |
| 131 | [[fallthrough]]; | 131 | [[fallthrough]]; |
| 132 | default: | 132 | default: |
| 133 | throw vk::Exception(query_result); | 133 | throw vk::Exception(query_result); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 837fe9ebf..7190946b9 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/query_cache.h" | 13 | #include "video_core/query_cache.h" |
| 14 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 15 | #include "video_core/renderer_vulkan/wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 16 | 16 | ||
| 17 | namespace VideoCore { | 17 | namespace VideoCore { |
| 18 | class RasterizerInterface; | 18 | class RasterizerInterface; |
| @@ -21,8 +21,8 @@ class RasterizerInterface; | |||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| 23 | class CachedQuery; | 23 | class CachedQuery; |
| 24 | class Device; | ||
| 24 | class HostCounter; | 25 | class HostCounter; |
| 25 | class VKDevice; | ||
| 26 | class VKQueryCache; | 26 | class VKQueryCache; |
| 27 | class VKScheduler; | 27 | class VKScheduler; |
| 28 | 28 | ||
| @@ -30,7 +30,7 @@ using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; | |||
| 30 | 30 | ||
| 31 | class QueryPool final : public ResourcePool { | 31 | class QueryPool final : public ResourcePool { |
| 32 | public: | 32 | public: |
| 33 | explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); | 33 | explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type); |
| 34 | ~QueryPool() override; | 34 | ~QueryPool() override; |
| 35 | 35 | ||
| 36 | std::pair<VkQueryPool, u32> Commit(); | 36 | std::pair<VkQueryPool, u32> Commit(); |
| @@ -43,7 +43,7 @@ protected: | |||
| 43 | private: | 43 | private: |
| 44 | static constexpr std::size_t GROW_STEP = 512; | 44 | static constexpr std::size_t GROW_STEP = 512; |
| 45 | 45 | ||
| 46 | const VKDevice& device; | 46 | const Device& device; |
| 47 | const VideoCore::QueryType type; | 47 | const VideoCore::QueryType type; |
| 48 | 48 | ||
| 49 | std::vector<vk::QueryPool> pools; | 49 | std::vector<vk::QueryPool> pools; |
| @@ -55,23 +55,23 @@ class VKQueryCache final | |||
| 55 | public: | 55 | public: |
| 56 | explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, | 56 | explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 57 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 57 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, |
| 58 | const VKDevice& device_, VKScheduler& scheduler_); | 58 | const Device& device_, VKScheduler& scheduler_); |
| 59 | ~VKQueryCache(); | 59 | ~VKQueryCache(); |
| 60 | 60 | ||
| 61 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); | 61 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); |
| 62 | 62 | ||
| 63 | void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); | 63 | void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); |
| 64 | 64 | ||
| 65 | const VKDevice& Device() const noexcept { | 65 | const Device& GetDevice() const noexcept { |
| 66 | return device; | 66 | return device; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | VKScheduler& Scheduler() const noexcept { | 69 | VKScheduler& GetScheduler() const noexcept { |
| 70 | return scheduler; | 70 | return scheduler; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | private: | 73 | private: |
| 74 | const VKDevice& device; | 74 | const Device& device; |
| 75 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
| 76 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | 76 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; |
| 77 | }; | 77 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f93986aab..93fbea510 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "core/settings.h" | 19 | #include "core/settings.h" |
| 20 | #include "video_core/engines/kepler_compute.h" | 20 | #include "video_core/engines/kepler_compute.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 23 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 24 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| @@ -26,23 +27,24 @@ | |||
| 26 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 27 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 28 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 31 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 31 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 32 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 32 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 33 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 35 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 33 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 36 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 34 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 37 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 35 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 38 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 36 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 37 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 40 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 41 | #include "video_core/shader_cache.h" | 38 | #include "video_core/shader_cache.h" |
| 39 | #include "video_core/texture_cache/texture_cache.h" | ||
| 40 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 41 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 42 | 42 | ||
| 43 | namespace Vulkan { | 43 | namespace Vulkan { |
| 44 | 44 | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using VideoCommon::ImageViewId; | ||
| 47 | using VideoCommon::ImageViewType; | ||
| 46 | 48 | ||
| 47 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); | 49 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); |
| 48 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | 50 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); |
| @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192 | |||
| 58 | 60 | ||
| 59 | namespace { | 61 | namespace { |
| 60 | 62 | ||
| 61 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | 63 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); |
| 62 | 64 | ||
| 63 | VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | 65 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { |
| 64 | const auto& src = regs.viewport_transform[index]; | 66 | const auto& src = regs.viewport_transform[index]; |
| 65 | const float width = src.scale_x * 2.0f; | 67 | const float width = src.scale_x * 2.0f; |
| 66 | const float height = src.scale_y * 2.0f; | 68 | const float height = src.scale_y * 2.0f; |
| @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si | |||
| 83 | return viewport; | 85 | return viewport; |
| 84 | } | 86 | } |
| 85 | 87 | ||
| 86 | VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | 88 | VkRect2D GetScissorState(const Maxwell& regs, size_t index) { |
| 87 | const auto& src = regs.scissor_test[index]; | 89 | const auto& src = regs.scissor_test[index]; |
| 88 | VkRect2D scissor; | 90 | VkRect2D scissor; |
| 89 | if (src.enable) { | 91 | if (src.enable) { |
| @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | |||
| 103 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | 105 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |
| 104 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 106 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 105 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | 107 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; |
| 106 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | 108 | for (size_t i = 0; i < std::size(addresses); ++i) { |
| 107 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | 109 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; |
| 108 | } | 110 | } |
| 109 | return addresses; | 111 | return addresses; |
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, | 114 | struct TextureHandle { |
| 113 | VkAccessFlags access) { | 115 | constexpr TextureHandle(u32 data, bool via_header_index) { |
| 114 | for (auto& [view, layout] : views) { | 116 | const Tegra::Texture::TextureHandle handle{data}; |
| 115 | view->Transition(*layout, pipeline_stage, access); | 117 | image = handle.tic_id; |
| 118 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 116 | } | 119 | } |
| 117 | } | 120 | |
| 121 | u32 image; | ||
| 122 | u32 sampler; | ||
| 123 | }; | ||
| 118 | 124 | ||
| 119 | template <typename Engine, typename Entry> | 125 | template <typename Engine, typename Entry> |
| 120 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 126 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |
| 121 | std::size_t stage, std::size_t index = 0) { | 127 | size_t stage, size_t index = 0) { |
| 122 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | 128 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 123 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | 129 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { |
| 124 | if (entry.is_separated) { | 130 | if (entry.is_separated) { |
| 125 | const u32 buffer_1 = entry.buffer; | 131 | const u32 buffer_1 = entry.buffer; |
| 126 | const u32 buffer_2 = entry.secondary_buffer; | 132 | const u32 buffer_2 = entry.secondary_buffer; |
| 127 | const u32 offset_1 = entry.offset; | 133 | const u32 offset_1 = entry.offset; |
| 128 | const u32 offset_2 = entry.secondary_offset; | 134 | const u32 offset_2 = entry.secondary_offset; |
| 129 | const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); | 135 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |
| 130 | const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); | 136 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |
| 131 | return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); | 137 | return TextureHandle(handle_1 | handle_2, via_header_index); |
| 132 | } | 138 | } |
| 133 | } | 139 | } |
| 134 | if (entry.is_bindless) { | 140 | if (entry.is_bindless) { |
| 135 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); | 141 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 136 | return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); | 142 | return TextureHandle(raw, via_header_index); |
| 137 | } | ||
| 138 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 139 | const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 140 | const u32 offset = entry.offset + entry_offset; | ||
| 141 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 142 | return engine.GetStageTexture(stage_type, offset); | ||
| 143 | } else { | ||
| 144 | return engine.GetTexture(offset); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /// @brief Determine if an attachment to be updated has to preserve contents | ||
| 149 | /// @param is_clear True when a clear is being executed | ||
| 150 | /// @param regs 3D registers | ||
| 151 | /// @return True when the contents have to be preserved | ||
| 152 | bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { | ||
| 153 | if (!is_clear) { | ||
| 154 | return true; | ||
| 155 | } | ||
| 156 | // First we have to make sure all clear masks are enabled. | ||
| 157 | if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || | ||
| 158 | !regs.clear_buffers.A) { | ||
| 159 | return true; | ||
| 160 | } | ||
| 161 | // If scissors are disabled, the whole screen is cleared | ||
| 162 | if (!regs.clear_flags.scissor) { | ||
| 163 | return false; | ||
| 164 | } | 143 | } |
| 165 | // Then we have to confirm scissor testing clears the whole image | 144 | const u32 buffer = engine.GetBoundBuffer(); |
| 166 | const std::size_t index = regs.clear_buffers.RT; | 145 | const u64 offset = (entry.offset + index) * sizeof(u32); |
| 167 | const auto& scissor = regs.scissor_test[0]; | 146 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 168 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || | ||
| 169 | scissor.max_y < regs.rt[index].height; | ||
| 170 | } | 147 | } |
| 171 | 148 | ||
| 172 | /// @brief Determine if an attachment to be updated has to preserve contents | 149 | template <size_t N> |
| 173 | /// @param is_clear True when a clear is being executed | ||
| 174 | /// @param regs 3D registers | ||
| 175 | /// @return True when the contents have to be preserved | ||
| 176 | bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { | ||
| 177 | // If we are not clearing, the contents have to be preserved | ||
| 178 | if (!is_clear) { | ||
| 179 | return true; | ||
| 180 | } | ||
| 181 | // For depth stencil clears we only have to confirm scissor test covers the whole image | ||
| 182 | if (!regs.clear_flags.scissor) { | ||
| 183 | return false; | ||
| 184 | } | ||
| 185 | // Make sure the clear cover the whole image | ||
| 186 | const auto& scissor = regs.scissor_test[0]; | ||
| 187 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || | ||
| 188 | scissor.max_y < regs.zeta_height; | ||
| 189 | } | ||
| 190 | |||
| 191 | template <std::size_t N> | ||
| 192 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | 150 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { |
| 193 | std::array<VkDeviceSize, N> expanded; | 151 | std::array<VkDeviceSize, N> expanded; |
| 194 | std::copy(strides.begin(), strides.end(), expanded.begin()); | 152 | std::copy(strides.begin(), strides.end(), expanded.begin()); |
| 195 | return expanded; | 153 | return expanded; |
| 196 | } | 154 | } |
| 197 | 155 | ||
| 156 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 157 | if (entry.is_buffer) { | ||
| 158 | return ImageViewType::e2D; | ||
| 159 | } | ||
| 160 | switch (entry.type) { | ||
| 161 | case Tegra::Shader::TextureType::Texture1D: | ||
| 162 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 163 | case Tegra::Shader::TextureType::Texture2D: | ||
| 164 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 165 | case Tegra::Shader::TextureType::Texture3D: | ||
| 166 | return ImageViewType::e3D; | ||
| 167 | case Tegra::Shader::TextureType::TextureCube: | ||
| 168 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 169 | } | ||
| 170 | UNREACHABLE(); | ||
| 171 | return ImageViewType::e2D; | ||
| 172 | } | ||
| 173 | |||
| 174 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 175 | switch (entry.type) { | ||
| 176 | case Tegra::Shader::ImageType::Texture1D: | ||
| 177 | return ImageViewType::e1D; | ||
| 178 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 179 | return ImageViewType::e1DArray; | ||
| 180 | case Tegra::Shader::ImageType::Texture2D: | ||
| 181 | return ImageViewType::e2D; | ||
| 182 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 183 | return ImageViewType::e2DArray; | ||
| 184 | case Tegra::Shader::ImageType::Texture3D: | ||
| 185 | return ImageViewType::e3D; | ||
| 186 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 187 | return ImageViewType::Buffer; | ||
| 188 | } | ||
| 189 | UNREACHABLE(); | ||
| 190 | return ImageViewType::e2D; | ||
| 191 | } | ||
| 192 | |||
| 193 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 194 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 195 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 196 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 197 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 198 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 199 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 200 | } | ||
| 201 | for (const auto& entry : entries.samplers) { | ||
| 202 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 203 | const VkSampler sampler = *sampler_ptr++; | ||
| 204 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 205 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 206 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 207 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 211 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 212 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 213 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 214 | } | ||
| 215 | for (const auto& entry : entries.images) { | ||
| 216 | // TODO: Mark as modified | ||
| 217 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 218 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 219 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 220 | update_descriptor_queue.AddImage(handle); | ||
| 221 | } | ||
| 222 | } | ||
| 223 | |||
| 198 | } // Anonymous namespace | 224 | } // Anonymous namespace |
| 199 | 225 | ||
| 200 | class BufferBindings final { | 226 | class BufferBindings final { |
| @@ -213,7 +239,7 @@ public: | |||
| 213 | index.type = type; | 239 | index.type = type; |
| 214 | } | 240 | } |
| 215 | 241 | ||
| 216 | void Bind(const VKDevice& device, VKScheduler& scheduler) const { | 242 | void Bind(const Device& device, VKScheduler& scheduler) const { |
| 217 | // Use this large switch case to avoid dispatching more memory in the record lambda than | 243 | // Use this large switch case to avoid dispatching more memory in the record lambda than |
| 218 | // what we need. It looks horrible, but it's the best we can do on standard C++. | 244 | // what we need. It looks horrible, but it's the best we can do on standard C++. |
| 219 | switch (vertex.num_buffers) { | 245 | switch (vertex.num_buffers) { |
| @@ -290,7 +316,7 @@ public: | |||
| 290 | private: | 316 | private: |
| 291 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | 317 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. |
| 292 | struct { | 318 | struct { |
| 293 | std::size_t num_buffers = 0; | 319 | size_t num_buffers = 0; |
| 294 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | 320 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; |
| 295 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | 321 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; |
| 296 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | 322 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; |
| @@ -303,8 +329,8 @@ private: | |||
| 303 | VkIndexType type; | 329 | VkIndexType type; |
| 304 | } index; | 330 | } index; |
| 305 | 331 | ||
| 306 | template <std::size_t N> | 332 | template <size_t N> |
| 307 | void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { | 333 | void BindStatic(const Device& device, VKScheduler& scheduler) const { |
| 308 | if (device.IsExtExtendedDynamicStateSupported()) { | 334 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 309 | if (index.buffer) { | 335 | if (index.buffer) { |
| 310 | BindStatic<N, true, true>(scheduler); | 336 | BindStatic<N, true, true>(scheduler); |
| @@ -320,7 +346,7 @@ private: | |||
| 320 | } | 346 | } |
| 321 | } | 347 | } |
| 322 | 348 | ||
| 323 | template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> | 349 | template <size_t N, bool is_indexed, bool has_extended_dynamic_state> |
| 324 | void BindStatic(VKScheduler& scheduler) const { | 350 | void BindStatic(VKScheduler& scheduler) const { |
| 325 | static_assert(N <= Maxwell::NumVertexArrays); | 351 | static_assert(N <= Maxwell::NumVertexArrays); |
| 326 | if constexpr (N == 0) { | 352 | if constexpr (N == 0) { |
| @@ -383,22 +409,25 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { | |||
| 383 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 409 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 384 | Tegra::MemoryManager& gpu_memory_, | 410 | Tegra::MemoryManager& gpu_memory_, |
| 385 | Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, | 411 | Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, |
| 386 | const VKDevice& device_, VKMemoryManager& memory_manager_, | 412 | const Device& device_, VKMemoryManager& memory_manager_, |
| 387 | StateTracker& state_tracker_, VKScheduler& scheduler_) | 413 | StateTracker& state_tracker_, VKScheduler& scheduler_) |
| 388 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), | 414 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, |
| 389 | maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), | 415 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, |
| 390 | device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), | 416 | screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, |
| 391 | scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), | 417 | state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), |
| 392 | descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), | 418 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), |
| 393 | renderpass_cache(device), | 419 | update_descriptor_queue(device, scheduler), |
| 420 | blit_image(device, scheduler, state_tracker, descriptor_pool), | ||
| 394 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 421 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 395 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 422 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 396 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 423 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 397 | texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), | 424 | texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, |
| 425 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||
| 398 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 426 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 399 | descriptor_pool, update_descriptor_queue, renderpass_cache), | 427 | descriptor_pool, update_descriptor_queue), |
| 400 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), | 428 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, |
| 401 | sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), | 429 | staging_pool), |
| 430 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | ||
| 402 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, | 431 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, |
| 403 | scheduler), | 432 | scheduler), |
| 404 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 433 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 427 | const DrawParameters draw_params = | 456 | const DrawParameters draw_params = |
| 428 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | 457 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); |
| 429 | 458 | ||
| 430 | update_descriptor_queue.Acquire(); | 459 | auto lock = texture_cache.AcquireLock(); |
| 431 | sampled_views.clear(); | 460 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 432 | image_views.clear(); | 461 | |
| 462 | texture_cache.UpdateRenderTargets(false); | ||
| 433 | 463 | ||
| 434 | const auto shaders = pipeline_cache.GetShaders(); | 464 | const auto shaders = pipeline_cache.GetShaders(); |
| 435 | key.shaders = GetShaderAddresses(shaders); | 465 | key.shaders = GetShaderAddresses(shaders); |
| @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 437 | 467 | ||
| 438 | buffer_cache.Unmap(); | 468 | buffer_cache.Unmap(); |
| 439 | 469 | ||
| 440 | const Texceptions texceptions = UpdateAttachments(false); | 470 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 441 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | 471 | key.renderpass = framebuffer->RenderPass(); |
| 442 | |||
| 443 | key.renderpass_params = GetRenderPassParams(texceptions); | ||
| 444 | key.padding = 0; | ||
| 445 | 472 | ||
| 446 | auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); | 473 | auto* const pipeline = |
| 474 | pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 447 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | 475 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { |
| 448 | // Async graphics pipeline was not ready. | 476 | // Async graphics pipeline was not ready. |
| 449 | return; | 477 | return; |
| 450 | } | 478 | } |
| 451 | 479 | ||
| 452 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 453 | |||
| 454 | const auto renderpass = pipeline->GetRenderPass(); | ||
| 455 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 456 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); | ||
| 457 | |||
| 458 | UpdateDynamicStates(); | ||
| 459 | |||
| 460 | buffer_bindings.Bind(device, scheduler); | 480 | buffer_bindings.Bind(device, scheduler); |
| 461 | 481 | ||
| 462 | BeginTransformFeedback(); | 482 | BeginTransformFeedback(); |
| 463 | 483 | ||
| 484 | scheduler.RequestRenderpass(framebuffer); | ||
| 485 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 486 | UpdateDynamicStates(); | ||
| 487 | |||
| 464 | const auto pipeline_layout = pipeline->GetLayout(); | 488 | const auto pipeline_layout = pipeline->GetLayout(); |
| 465 | const auto descriptor_set = pipeline->CommitDescriptorSet(); | 489 | const auto descriptor_set = pipeline->CommitDescriptorSet(); |
| 466 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | 490 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() { | |||
| 481 | return; | 505 | return; |
| 482 | } | 506 | } |
| 483 | 507 | ||
| 484 | sampled_views.clear(); | ||
| 485 | image_views.clear(); | ||
| 486 | |||
| 487 | query_cache.UpdateCounters(); | 508 | query_cache.UpdateCounters(); |
| 488 | 509 | ||
| 489 | const auto& regs = maxwell3d.regs; | 510 | const auto& regs = maxwell3d.regs; |
| @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() { | |||
| 495 | return; | 516 | return; |
| 496 | } | 517 | } |
| 497 | 518 | ||
| 498 | [[maybe_unused]] const auto texceptions = UpdateAttachments(true); | 519 | auto lock = texture_cache.AcquireLock(); |
| 499 | DEBUG_ASSERT(texceptions.none()); | 520 | texture_cache.UpdateRenderTargets(true); |
| 500 | SetupImageTransitions(0, color_attachments, zeta_attachment); | 521 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 501 | 522 | const VkExtent2D render_area = framebuffer->RenderArea(); | |
| 502 | const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); | 523 | scheduler.RequestRenderpass(framebuffer); |
| 503 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 504 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); | ||
| 505 | 524 | ||
| 506 | VkClearRect clear_rect; | 525 | VkClearRect clear_rect{ |
| 507 | clear_rect.baseArrayLayer = regs.clear_buffers.layer; | 526 | .rect = GetScissorState(regs, 0), |
| 508 | clear_rect.layerCount = 1; | 527 | .baseArrayLayer = regs.clear_buffers.layer, |
| 509 | clear_rect.rect = GetScissorState(regs, 0); | 528 | .layerCount = 1, |
| 510 | clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); | 529 | }; |
| 511 | clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); | 530 | if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { |
| 531 | return; | ||
| 532 | } | ||
| 533 | clear_rect.rect.extent = VkExtent2D{ | ||
| 534 | .width = std::min(clear_rect.rect.extent.width, render_area.width), | ||
| 535 | .height = std::min(clear_rect.rect.extent.height, render_area.height), | ||
| 536 | }; | ||
| 512 | 537 | ||
| 513 | if (use_color) { | 538 | if (use_color) { |
| 514 | VkClearValue clear_value; | 539 | VkClearValue clear_value; |
| @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() { | |||
| 549 | 574 | ||
| 550 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 575 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { |
| 551 | MICROPROFILE_SCOPE(Vulkan_Compute); | 576 | MICROPROFILE_SCOPE(Vulkan_Compute); |
| 552 | update_descriptor_queue.Acquire(); | ||
| 553 | sampled_views.clear(); | ||
| 554 | image_views.clear(); | ||
| 555 | 577 | ||
| 556 | query_cache.UpdateCounters(); | 578 | query_cache.UpdateCounters(); |
| 557 | 579 | ||
| @@ -570,30 +592,46 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 570 | // Compute dispatches can't be executed inside a renderpass | 592 | // Compute dispatches can't be executed inside a renderpass |
| 571 | scheduler.RequestOutsideRenderPassOperationContext(); | 593 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 572 | 594 | ||
| 573 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | 595 | image_view_indices.clear(); |
| 596 | sampler_handles.clear(); | ||
| 597 | |||
| 598 | auto lock = texture_cache.AcquireLock(); | ||
| 599 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 574 | 600 | ||
| 575 | const auto& entries = pipeline.GetEntries(); | 601 | const auto& entries = pipeline.GetEntries(); |
| 576 | SetupComputeConstBuffers(entries); | ||
| 577 | SetupComputeGlobalBuffers(entries); | ||
| 578 | SetupComputeUniformTexels(entries); | 602 | SetupComputeUniformTexels(entries); |
| 579 | SetupComputeTextures(entries); | 603 | SetupComputeTextures(entries); |
| 580 | SetupComputeStorageTexels(entries); | 604 | SetupComputeStorageTexels(entries); |
| 581 | SetupComputeImages(entries); | 605 | SetupComputeImages(entries); |
| 582 | 606 | ||
| 583 | buffer_cache.Unmap(); | 607 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 608 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 584 | 609 | ||
| 585 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 610 | buffer_cache.Map(CalculateComputeStreamBufferSize()); |
| 586 | VK_ACCESS_SHADER_READ_BIT); | ||
| 587 | TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 588 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); | ||
| 589 | 611 | ||
| 612 | update_descriptor_queue.Acquire(); | ||
| 613 | |||
| 614 | SetupComputeConstBuffers(entries); | ||
| 615 | SetupComputeGlobalBuffers(entries); | ||
| 616 | |||
| 617 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 618 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 619 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 620 | sampler_ptr); | ||
| 621 | |||
| 622 | buffer_cache.Unmap(); | ||
| 623 | |||
| 624 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 625 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 626 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 590 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | 627 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, |
| 591 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | 628 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, |
| 592 | layout = pipeline.GetLayout(), | 629 | descriptor_set](vk::CommandBuffer cmdbuf) { |
| 593 | descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { | ||
| 594 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | 630 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); |
| 595 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, | 631 | if (descriptor_set) { |
| 596 | descriptor_set, {}); | 632 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, |
| 633 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 634 | } | ||
| 597 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | 635 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); |
| 598 | }); | 636 | }); |
| 599 | } | 637 | } |
| @@ -613,7 +651,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 613 | if (addr == 0 || size == 0) { | 651 | if (addr == 0 || size == 0) { |
| 614 | return; | 652 | return; |
| 615 | } | 653 | } |
| 616 | texture_cache.FlushRegion(addr, size); | 654 | { |
| 655 | auto lock = texture_cache.AcquireLock(); | ||
| 656 | texture_cache.DownloadMemory(addr, size); | ||
| 657 | } | ||
| 617 | buffer_cache.FlushRegion(addr, size); | 658 | buffer_cache.FlushRegion(addr, size); |
| 618 | query_cache.FlushRegion(addr, size); | 659 | query_cache.FlushRegion(addr, size); |
| 619 | } | 660 | } |
| @@ -622,14 +663,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | |||
| 622 | if (!Settings::IsGPULevelHigh()) { | 663 | if (!Settings::IsGPULevelHigh()) { |
| 623 | return buffer_cache.MustFlushRegion(addr, size); | 664 | return buffer_cache.MustFlushRegion(addr, size); |
| 624 | } | 665 | } |
| 625 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | 666 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 667 | buffer_cache.MustFlushRegion(addr, size); | ||
| 626 | } | 668 | } |
| 627 | 669 | ||
| 628 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 670 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 629 | if (addr == 0 || size == 0) { | 671 | if (addr == 0 || size == 0) { |
| 630 | return; | 672 | return; |
| 631 | } | 673 | } |
| 632 | texture_cache.InvalidateRegion(addr, size); | 674 | { |
| 675 | auto lock = texture_cache.AcquireLock(); | ||
| 676 | texture_cache.WriteMemory(addr, size); | ||
| 677 | } | ||
| 633 | pipeline_cache.InvalidateRegion(addr, size); | 678 | pipeline_cache.InvalidateRegion(addr, size); |
| 634 | buffer_cache.InvalidateRegion(addr, size); | 679 | buffer_cache.InvalidateRegion(addr, size); |
| 635 | query_cache.InvalidateRegion(addr, size); | 680 | query_cache.InvalidateRegion(addr, size); |
| @@ -639,17 +684,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 639 | if (addr == 0 || size == 0) { | 684 | if (addr == 0 || size == 0) { |
| 640 | return; | 685 | return; |
| 641 | } | 686 | } |
| 642 | texture_cache.OnCPUWrite(addr, size); | 687 | { |
| 688 | auto lock = texture_cache.AcquireLock(); | ||
| 689 | texture_cache.WriteMemory(addr, size); | ||
| 690 | } | ||
| 643 | pipeline_cache.OnCPUWrite(addr, size); | 691 | pipeline_cache.OnCPUWrite(addr, size); |
| 644 | buffer_cache.OnCPUWrite(addr, size); | 692 | buffer_cache.OnCPUWrite(addr, size); |
| 645 | } | 693 | } |
| 646 | 694 | ||
| 647 | void RasterizerVulkan::SyncGuestHost() { | 695 | void RasterizerVulkan::SyncGuestHost() { |
| 648 | texture_cache.SyncGuestHost(); | ||
| 649 | buffer_cache.SyncGuestHost(); | 696 | buffer_cache.SyncGuestHost(); |
| 650 | pipeline_cache.SyncGuestHost(); | 697 | pipeline_cache.SyncGuestHost(); |
| 651 | } | 698 | } |
| 652 | 699 | ||
| 700 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||
| 701 | { | ||
| 702 | auto lock = texture_cache.AcquireLock(); | ||
| 703 | texture_cache.UnmapMemory(addr, size); | ||
| 704 | } | ||
| 705 | buffer_cache.OnCPUWrite(addr, size); | ||
| 706 | pipeline_cache.OnCPUWrite(addr, size); | ||
| 707 | } | ||
| 708 | |||
| 653 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 709 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 654 | if (!gpu.IsAsync()) { | 710 | if (!gpu.IsAsync()) { |
| 655 | gpu_memory.Write<u32>(addr, value); | 711 | gpu_memory.Write<u32>(addr, value); |
| @@ -700,6 +756,14 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 700 | }); | 756 | }); |
| 701 | } | 757 | } |
| 702 | 758 | ||
| 759 | void RasterizerVulkan::FragmentBarrier() { | ||
| 760 | // We already put barriers when a render pass finishes | ||
| 761 | } | ||
| 762 | |||
| 763 | void RasterizerVulkan::TiledCacheBarrier() { | ||
| 764 | // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend | ||
| 765 | } | ||
| 766 | |||
| 703 | void RasterizerVulkan::FlushCommands() { | 767 | void RasterizerVulkan::FlushCommands() { |
| 704 | if (draw_counter > 0) { | 768 | if (draw_counter > 0) { |
| 705 | draw_counter = 0; | 769 | draw_counter = 0; |
| @@ -710,14 +774,20 @@ void RasterizerVulkan::FlushCommands() { | |||
| 710 | void RasterizerVulkan::TickFrame() { | 774 | void RasterizerVulkan::TickFrame() { |
| 711 | draw_counter = 0; | 775 | draw_counter = 0; |
| 712 | update_descriptor_queue.TickFrame(); | 776 | update_descriptor_queue.TickFrame(); |
| 777 | fence_manager.TickFrame(); | ||
| 713 | buffer_cache.TickFrame(); | 778 | buffer_cache.TickFrame(); |
| 714 | staging_pool.TickFrame(); | 779 | staging_pool.TickFrame(); |
| 780 | { | ||
| 781 | auto lock = texture_cache.AcquireLock(); | ||
| 782 | texture_cache.TickFrame(); | ||
| 783 | } | ||
| 715 | } | 784 | } |
| 716 | 785 | ||
| 717 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 786 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 718 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 787 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 719 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 788 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 720 | texture_cache.DoFermiCopy(src, dst, copy_config); | 789 | auto lock = texture_cache.AcquireLock(); |
| 790 | texture_cache.BlitImage(dst, src, copy_config); | ||
| 721 | return true; | 791 | return true; |
| 722 | } | 792 | } |
| 723 | 793 | ||
| @@ -727,20 +797,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 727 | return false; | 797 | return false; |
| 728 | } | 798 | } |
| 729 | 799 | ||
| 730 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | 800 | auto lock = texture_cache.AcquireLock(); |
| 731 | if (!surface) { | 801 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); |
| 802 | if (!image_view) { | ||
| 732 | return false; | 803 | return false; |
| 733 | } | 804 | } |
| 734 | 805 | ||
| 735 | // Verify that the cached surface is the same size and format as the requested framebuffer | 806 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); |
| 736 | const auto& params{surface->GetSurfaceParams()}; | 807 | screen_info.width = image_view->size.width; |
| 737 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 808 | screen_info.height = image_view->size.height; |
| 738 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 809 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 739 | |||
| 740 | screen_info.image = &surface->GetImage(); | ||
| 741 | screen_info.width = params.width; | ||
| 742 | screen_info.height = params.height; | ||
| 743 | screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; | ||
| 744 | return true; | 810 | return true; |
| 745 | } | 811 | } |
| 746 | 812 | ||
| @@ -765,103 +831,6 @@ void RasterizerVulkan::FlushWork() { | |||
| 765 | draw_counter = 0; | 831 | draw_counter = 0; |
| 766 | } | 832 | } |
| 767 | 833 | ||
| 768 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { | ||
| 769 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 770 | |||
| 771 | const auto& regs = maxwell3d.regs; | ||
| 772 | auto& dirty = maxwell3d.dirty.flags; | ||
| 773 | const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; | ||
| 774 | dirty[VideoCommon::Dirty::RenderTargets] = false; | ||
| 775 | |||
| 776 | texture_cache.GuardRenderTargets(true); | ||
| 777 | |||
| 778 | Texceptions texceptions; | ||
| 779 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 780 | if (update_rendertargets) { | ||
| 781 | const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); | ||
| 782 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); | ||
| 783 | } | ||
| 784 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | ||
| 785 | texceptions[rt] = true; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 789 | if (update_rendertargets) { | ||
| 790 | const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); | ||
| 791 | zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); | ||
| 792 | } | ||
| 793 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | ||
| 794 | texceptions[ZETA_TEXCEPTION_INDEX] = true; | ||
| 795 | } | ||
| 796 | |||
| 797 | texture_cache.GuardRenderTargets(false); | ||
| 798 | |||
| 799 | return texceptions; | ||
| 800 | } | ||
| 801 | |||
| 802 | bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { | ||
| 803 | bool overlap = false; | ||
| 804 | for (auto& [view, layout] : sampled_views) { | ||
| 805 | if (!attachment.IsSameSurface(*view)) { | ||
| 806 | continue; | ||
| 807 | } | ||
| 808 | overlap = true; | ||
| 809 | *layout = VK_IMAGE_LAYOUT_GENERAL; | ||
| 810 | } | ||
| 811 | return overlap; | ||
| 812 | } | ||
| 813 | |||
| 814 | std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | ||
| 815 | VkRenderPass renderpass) { | ||
| 816 | FramebufferCacheKey key{ | ||
| 817 | .renderpass = renderpass, | ||
| 818 | .width = std::numeric_limits<u32>::max(), | ||
| 819 | .height = std::numeric_limits<u32>::max(), | ||
| 820 | .layers = std::numeric_limits<u32>::max(), | ||
| 821 | .views = {}, | ||
| 822 | }; | ||
| 823 | |||
| 824 | const auto try_push = [&key](const View& view) { | ||
| 825 | if (!view) { | ||
| 826 | return false; | ||
| 827 | } | ||
| 828 | key.views.push_back(view->GetAttachment()); | ||
| 829 | key.width = std::min(key.width, view->GetWidth()); | ||
| 830 | key.height = std::min(key.height, view->GetHeight()); | ||
| 831 | key.layers = std::min(key.layers, view->GetNumLayers()); | ||
| 832 | return true; | ||
| 833 | }; | ||
| 834 | |||
| 835 | const auto& regs = maxwell3d.regs; | ||
| 836 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); | ||
| 837 | for (std::size_t index = 0; index < num_attachments; ++index) { | ||
| 838 | if (try_push(color_attachments[index])) { | ||
| 839 | texture_cache.MarkColorBufferInUse(index); | ||
| 840 | } | ||
| 841 | } | ||
| 842 | if (try_push(zeta_attachment)) { | ||
| 843 | texture_cache.MarkDepthBufferInUse(); | ||
| 844 | } | ||
| 845 | |||
| 846 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | ||
| 847 | auto& framebuffer = fbentry->second; | ||
| 848 | if (is_cache_miss) { | ||
| 849 | framebuffer = device.GetLogical().CreateFramebuffer({ | ||
| 850 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 851 | .pNext = nullptr, | ||
| 852 | .flags = 0, | ||
| 853 | .renderPass = key.renderpass, | ||
| 854 | .attachmentCount = static_cast<u32>(key.views.size()), | ||
| 855 | .pAttachments = key.views.data(), | ||
| 856 | .width = key.width, | ||
| 857 | .height = key.height, | ||
| 858 | .layers = key.layers, | ||
| 859 | }); | ||
| 860 | } | ||
| 861 | |||
| 862 | return {*framebuffer, VkExtent2D{key.width, key.height}}; | ||
| 863 | } | ||
| 864 | |||
| 865 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | 834 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, |
| 866 | BufferBindings& buffer_bindings, | 835 | BufferBindings& buffer_bindings, |
| 867 | bool is_indexed, | 836 | bool is_indexed, |
| @@ -885,50 +854,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt | |||
| 885 | 854 | ||
| 886 | void RasterizerVulkan::SetupShaderDescriptors( | 855 | void RasterizerVulkan::SetupShaderDescriptors( |
| 887 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 856 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 888 | texture_cache.GuardSamplers(true); | 857 | image_view_indices.clear(); |
| 889 | 858 | sampler_handles.clear(); | |
| 890 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 859 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 891 | // Skip VertexA stage | ||
| 892 | Shader* const shader = shaders[stage + 1]; | 860 | Shader* const shader = shaders[stage + 1]; |
| 893 | if (!shader) { | 861 | if (!shader) { |
| 894 | continue; | 862 | continue; |
| 895 | } | 863 | } |
| 896 | const auto& entries = shader->GetEntries(); | 864 | const auto& entries = shader->GetEntries(); |
| 897 | SetupGraphicsConstBuffers(entries, stage); | ||
| 898 | SetupGraphicsGlobalBuffers(entries, stage); | ||
| 899 | SetupGraphicsUniformTexels(entries, stage); | 865 | SetupGraphicsUniformTexels(entries, stage); |
| 900 | SetupGraphicsTextures(entries, stage); | 866 | SetupGraphicsTextures(entries, stage); |
| 901 | SetupGraphicsStorageTexels(entries, stage); | 867 | SetupGraphicsStorageTexels(entries, stage); |
| 902 | SetupGraphicsImages(entries, stage); | 868 | SetupGraphicsImages(entries, stage); |
| 903 | } | 869 | } |
| 904 | texture_cache.GuardSamplers(false); | 870 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 905 | } | 871 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 906 | 872 | ||
| 907 | void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, | 873 | update_descriptor_queue.Acquire(); |
| 908 | const ZetaAttachment& zeta) { | ||
| 909 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); | ||
| 910 | TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, | ||
| 911 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); | ||
| 912 | 874 | ||
| 913 | for (std::size_t rt = 0; rt < color.size(); ++rt) { | 875 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| 914 | const auto color_attachment = color[rt]; | 876 | VkSampler* sampler_ptr = sampler_handles.data(); |
| 915 | if (color_attachment == nullptr) { | 877 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 878 | // Skip VertexA stage | ||
| 879 | Shader* const shader = shaders[stage + 1]; | ||
| 880 | if (!shader) { | ||
| 916 | continue; | 881 | continue; |
| 917 | } | 882 | } |
| 918 | const auto image_layout = | 883 | const auto& entries = shader->GetEntries(); |
| 919 | texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | 884 | SetupGraphicsConstBuffers(entries, stage); |
| 920 | color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | 885 | SetupGraphicsGlobalBuffers(entries, stage); |
| 921 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | 886 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |
| 922 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); | 887 | sampler_ptr); |
| 923 | } | ||
| 924 | |||
| 925 | if (zeta != nullptr) { | ||
| 926 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | ||
| 927 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 928 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | ||
| 929 | zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, | ||
| 930 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 931 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); | ||
| 932 | } | 888 | } |
| 933 | } | 889 | } |
| 934 | 890 | ||
| @@ -1000,7 +956,7 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 1000 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | 956 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { |
| 1001 | const auto& regs = maxwell3d.regs; | 957 | const auto& regs = maxwell3d.regs; |
| 1002 | 958 | ||
| 1003 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 959 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 1004 | const auto& vertex_array = regs.vertex_array[index]; | 960 | const auto& vertex_array = regs.vertex_array[index]; |
| 1005 | if (!vertex_array.IsEnabled()) { | 961 | if (!vertex_array.IsEnabled()) { |
| 1006 | continue; | 962 | continue; |
| @@ -1009,7 +965,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | |||
| 1009 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 965 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| 1010 | 966 | ||
| 1011 | ASSERT(end >= start); | 967 | ASSERT(end >= start); |
| 1012 | const std::size_t size = end - start; | 968 | const size_t size = end - start; |
| 1013 | if (size == 0) { | 969 | if (size == 0) { |
| 1014 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | 970 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); |
| 1015 | continue; | 971 | continue; |
| @@ -1070,7 +1026,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 1070 | } | 1026 | } |
| 1071 | } | 1027 | } |
| 1072 | 1028 | ||
| 1073 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { | 1029 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { |
| 1074 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | 1030 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); |
| 1075 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | 1031 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; |
| 1076 | for (const auto& entry : entries.const_buffers) { | 1032 | for (const auto& entry : entries.const_buffers) { |
| @@ -1078,7 +1034,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s | |||
| 1078 | } | 1034 | } |
| 1079 | } | 1035 | } |
| 1080 | 1036 | ||
| 1081 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { | 1037 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { |
| 1082 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | 1038 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); |
| 1083 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | 1039 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; |
| 1084 | 1040 | ||
| @@ -1088,37 +1044,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, | |||
| 1088 | } | 1044 | } |
| 1089 | } | 1045 | } |
| 1090 | 1046 | ||
| 1091 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { | 1047 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |
| 1092 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1048 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1049 | const auto& regs = maxwell3d.regs; | ||
| 1050 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1093 | for (const auto& entry : entries.uniform_texels) { | 1051 | for (const auto& entry : entries.uniform_texels) { |
| 1094 | const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; | 1052 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1095 | SetupUniformTexels(image, entry); | 1053 | image_view_indices.push_back(handle.image); |
| 1096 | } | 1054 | } |
| 1097 | } | 1055 | } |
| 1098 | 1056 | ||
| 1099 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { | 1057 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |
| 1100 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1058 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1059 | const auto& regs = maxwell3d.regs; | ||
| 1060 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1101 | for (const auto& entry : entries.samplers) { | 1061 | for (const auto& entry : entries.samplers) { |
| 1102 | for (std::size_t i = 0; i < entry.size; ++i) { | 1062 | for (size_t index = 0; index < entry.size; ++index) { |
| 1103 | const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); | 1063 | const TextureHandle handle = |
| 1104 | SetupTexture(texture, entry); | 1064 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); |
| 1065 | image_view_indices.push_back(handle.image); | ||
| 1066 | |||
| 1067 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 1068 | sampler_handles.push_back(sampler->Handle()); | ||
| 1105 | } | 1069 | } |
| 1106 | } | 1070 | } |
| 1107 | } | 1071 | } |
| 1108 | 1072 | ||
| 1109 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { | 1073 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |
| 1110 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1074 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1075 | const auto& regs = maxwell3d.regs; | ||
| 1076 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1111 | for (const auto& entry : entries.storage_texels) { | 1077 | for (const auto& entry : entries.storage_texels) { |
| 1112 | const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; | 1078 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1113 | SetupStorageTexel(image, entry); | 1079 | image_view_indices.push_back(handle.image); |
| 1114 | } | 1080 | } |
| 1115 | } | 1081 | } |
| 1116 | 1082 | ||
| 1117 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | 1083 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |
| 1118 | MICROPROFILE_SCOPE(Vulkan_Images); | 1084 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1085 | const auto& regs = maxwell3d.regs; | ||
| 1086 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 1119 | for (const auto& entry : entries.images) { | 1087 | for (const auto& entry : entries.images) { |
| 1120 | const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; | 1088 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |
| 1121 | SetupImage(tic, entry); | 1089 | image_view_indices.push_back(handle.image); |
| 1122 | } | 1090 | } |
| 1123 | } | 1091 | } |
| 1124 | 1092 | ||
| @@ -1128,11 +1096,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | |||
| 1128 | for (const auto& entry : entries.const_buffers) { | 1096 | for (const auto& entry : entries.const_buffers) { |
| 1129 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 1097 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 1130 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 1098 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 1131 | Tegra::Engines::ConstBufferInfo buffer; | 1099 | const Tegra::Engines::ConstBufferInfo info{ |
| 1132 | buffer.address = config.Address(); | 1100 | .address = config.Address(), |
| 1133 | buffer.size = config.size; | 1101 | .size = config.size, |
| 1134 | buffer.enabled = mask[entry.GetIndex()]; | 1102 | .enabled = mask[entry.GetIndex()], |
| 1135 | SetupConstBuffer(entry, buffer); | 1103 | }; |
| 1104 | SetupConstBuffer(entry, info); | ||
| 1136 | } | 1105 | } |
| 1137 | } | 1106 | } |
| 1138 | 1107 | ||
| @@ -1147,35 +1116,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | |||
| 1147 | 1116 | ||
| 1148 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | 1117 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1149 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1118 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1119 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1150 | for (const auto& entry : entries.uniform_texels) { | 1120 | for (const auto& entry : entries.uniform_texels) { |
| 1151 | const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1121 | const TextureHandle handle = |
| 1152 | SetupUniformTexels(image, entry); | 1122 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1123 | image_view_indices.push_back(handle.image); | ||
| 1153 | } | 1124 | } |
| 1154 | } | 1125 | } |
| 1155 | 1126 | ||
| 1156 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | 1127 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |
| 1157 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1128 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1129 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1158 | for (const auto& entry : entries.samplers) { | 1130 | for (const auto& entry : entries.samplers) { |
| 1159 | for (std::size_t i = 0; i < entry.size; ++i) { | 1131 | for (size_t index = 0; index < entry.size; ++index) { |
| 1160 | const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); | 1132 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, |
| 1161 | SetupTexture(texture, entry); | 1133 | COMPUTE_SHADER_INDEX, index); |
| 1134 | image_view_indices.push_back(handle.image); | ||
| 1135 | |||
| 1136 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 1137 | sampler_handles.push_back(sampler->Handle()); | ||
| 1162 | } | 1138 | } |
| 1163 | } | 1139 | } |
| 1164 | } | 1140 | } |
| 1165 | 1141 | ||
| 1166 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | 1142 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |
| 1167 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1143 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1144 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1168 | for (const auto& entry : entries.storage_texels) { | 1145 | for (const auto& entry : entries.storage_texels) { |
| 1169 | const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1146 | const TextureHandle handle = |
| 1170 | SetupStorageTexel(image, entry); | 1147 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1148 | image_view_indices.push_back(handle.image); | ||
| 1171 | } | 1149 | } |
| 1172 | } | 1150 | } |
| 1173 | 1151 | ||
| 1174 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 1152 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1175 | MICROPROFILE_SCOPE(Vulkan_Images); | 1153 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1154 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 1176 | for (const auto& entry : entries.images) { | 1155 | for (const auto& entry : entries.images) { |
| 1177 | const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; | 1156 | const TextureHandle handle = |
| 1178 | SetupImage(tic, entry); | 1157 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |
| 1158 | image_view_indices.push_back(handle.image); | ||
| 1179 | } | 1159 | } |
| 1180 | } | 1160 | } |
| 1181 | 1161 | ||
| @@ -1186,14 +1166,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | |||
| 1186 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | 1166 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); |
| 1187 | return; | 1167 | return; |
| 1188 | } | 1168 | } |
| 1189 | |||
| 1190 | // Align the size to avoid bad std140 interactions | 1169 | // Align the size to avoid bad std140 interactions |
| 1191 | const std::size_t size = | 1170 | const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); |
| 1192 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 1193 | ASSERT(size <= MaxConstbufferSize); | 1171 | ASSERT(size <= MaxConstbufferSize); |
| 1194 | 1172 | ||
| 1195 | const auto info = | 1173 | const u64 alignment = device.GetUniformBufferAlignment(); |
| 1196 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | 1174 | const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 1197 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | 1175 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1198 | } | 1176 | } |
| 1199 | 1177 | ||
| @@ -1206,7 +1184,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1206 | // because Vulkan doesn't like empty buffers. | 1184 | // because Vulkan doesn't like empty buffers. |
| 1207 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | 1185 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the |
| 1208 | // default buffer. | 1186 | // default buffer. |
| 1209 | static constexpr std::size_t dummy_size = 4; | 1187 | static constexpr size_t dummy_size = 4; |
| 1210 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | 1188 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); |
| 1211 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | 1189 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); |
| 1212 | return; | 1190 | return; |
| @@ -1217,55 +1195,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1217 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | 1195 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1218 | } | 1196 | } |
| 1219 | 1197 | ||
| 1220 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, | ||
| 1221 | const UniformTexelEntry& entry) { | ||
| 1222 | const auto view = texture_cache.GetTextureSurface(tic, entry); | ||
| 1223 | ASSERT(view->IsBufferView()); | ||
| 1224 | |||
| 1225 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, | ||
| 1229 | const SamplerEntry& entry) { | ||
| 1230 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 1231 | ASSERT(!view->IsBufferView()); | ||
| 1232 | |||
| 1233 | const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, | ||
| 1234 | texture.tic.z_source, texture.tic.w_source); | ||
| 1235 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 1236 | update_descriptor_queue.AddSampledImage(sampler, image_view); | ||
| 1237 | |||
| 1238 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); | ||
| 1239 | *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | ||
| 1240 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, | ||
| 1244 | const StorageTexelEntry& entry) { | ||
| 1245 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1246 | ASSERT(view->IsBufferView()); | ||
| 1247 | |||
| 1248 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | ||
| 1252 | auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1253 | |||
| 1254 | if (entry.is_written) { | ||
| 1255 | view->MarkAsModified(texture_cache.Tick()); | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | UNIMPLEMENTED_IF(tic.IsBuffer()); | ||
| 1259 | |||
| 1260 | const VkImageView image_view = | ||
| 1261 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1262 | update_descriptor_queue.AddImage(image_view); | ||
| 1263 | |||
| 1264 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); | ||
| 1265 | *image_layout = VK_IMAGE_LAYOUT_GENERAL; | ||
| 1266 | image_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 1198 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1270 | if (!state_tracker.TouchViewports()) { | 1199 | if (!state_tracker.TouchViewports()) { |
| 1271 | return; | 1200 | return; |
| @@ -1457,8 +1386,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1457 | }); | 1386 | }); |
| 1458 | } | 1387 | } |
| 1459 | 1388 | ||
| 1460 | std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | 1389 | size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { |
| 1461 | std::size_t size = CalculateVertexArraysSize(); | 1390 | size_t size = CalculateVertexArraysSize(); |
| 1462 | if (is_indexed) { | 1391 | if (is_indexed) { |
| 1463 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | 1392 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); |
| 1464 | } | 1393 | } |
| @@ -1466,15 +1395,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) | |||
| 1466 | return size; | 1395 | return size; |
| 1467 | } | 1396 | } |
| 1468 | 1397 | ||
| 1469 | std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | 1398 | size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { |
| 1470 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | 1399 | return Tegra::Engines::KeplerCompute::NumConstBuffers * |
| 1471 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 1400 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 1472 | } | 1401 | } |
| 1473 | 1402 | ||
| 1474 | std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | 1403 | size_t RasterizerVulkan::CalculateVertexArraysSize() const { |
| 1475 | const auto& regs = maxwell3d.regs; | 1404 | const auto& regs = maxwell3d.regs; |
| 1476 | 1405 | ||
| 1477 | std::size_t size = 0; | 1406 | size_t size = 0; |
| 1478 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 1407 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 1479 | // This implementation assumes that all attributes are used in the shader. | 1408 | // This implementation assumes that all attributes are used in the shader. |
| 1480 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | 1409 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; |
| @@ -1486,12 +1415,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | |||
| 1486 | return size; | 1415 | return size; |
| 1487 | } | 1416 | } |
| 1488 | 1417 | ||
| 1489 | std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { | 1418 | size_t RasterizerVulkan::CalculateIndexBufferSize() const { |
| 1490 | return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * | 1419 | return static_cast<size_t>(maxwell3d.regs.index_array.count) * |
| 1491 | static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | 1420 | static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); |
| 1492 | } | 1421 | } |
| 1493 | 1422 | ||
| 1494 | std::size_t RasterizerVulkan::CalculateConstBufferSize( | 1423 | size_t RasterizerVulkan::CalculateConstBufferSize( |
| 1495 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | 1424 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { |
| 1496 | if (entry.IsIndirect()) { | 1425 | if (entry.IsIndirect()) { |
| 1497 | // Buffer is accessed indirectly, so upload the entire thing | 1426 | // Buffer is accessed indirectly, so upload the entire thing |
| @@ -1502,37 +1431,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( | |||
| 1502 | } | 1431 | } |
| 1503 | } | 1432 | } |
| 1504 | 1433 | ||
| 1505 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | ||
| 1506 | const auto& regs = maxwell3d.regs; | ||
| 1507 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); | ||
| 1508 | |||
| 1509 | RenderPassParams params; | ||
| 1510 | params.color_formats = {}; | ||
| 1511 | std::size_t color_texceptions = 0; | ||
| 1512 | |||
| 1513 | std::size_t index = 0; | ||
| 1514 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { | ||
| 1515 | const auto& rendertarget = regs.rt[rt]; | ||
| 1516 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1517 | continue; | ||
| 1518 | } | ||
| 1519 | params.color_formats[index] = static_cast<u8>(rendertarget.format); | ||
| 1520 | color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; | ||
| 1521 | ++index; | ||
| 1522 | } | ||
| 1523 | params.num_color_attachments = static_cast<u8>(index); | ||
| 1524 | params.texceptions = static_cast<u8>(color_texceptions); | ||
| 1525 | |||
| 1526 | params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0; | ||
| 1527 | params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1528 | return params; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | VkBuffer RasterizerVulkan::DefaultBuffer() { | 1434 | VkBuffer RasterizerVulkan::DefaultBuffer() { |
| 1532 | if (default_buffer) { | 1435 | if (default_buffer) { |
| 1533 | return *default_buffer; | 1436 | return *default_buffer; |
| 1534 | } | 1437 | } |
| 1535 | |||
| 1536 | default_buffer = device.GetLogical().CreateBuffer({ | 1438 | default_buffer = device.GetLogical().CreateBuffer({ |
| 1537 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 1439 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 1538 | .pNext = nullptr, | 1440 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 30ec58eb4..4695718e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -11,11 +11,11 @@ | |||
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | 12 | ||
| 13 | #include <boost/container/static_vector.hpp> | 13 | #include <boost/container/static_vector.hpp> |
| 14 | #include <boost/functional/hash.hpp> | ||
| 15 | 14 | ||
| 16 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 17 | #include "video_core/rasterizer_accelerated.h" | 16 | #include "video_core/rasterizer_accelerated.h" |
| 18 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/renderer_vulkan/blit_image.h" | ||
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -24,14 +24,13 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 30 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 30 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 32 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 31 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 33 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 34 | #include "video_core/shader/async_shaders.h" | 32 | #include "video_core/shader/async_shaders.h" |
| 33 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 35 | 34 | ||
| 36 | namespace Core { | 35 | namespace Core { |
| 37 | class System; | 36 | class System; |
| @@ -49,65 +48,14 @@ namespace Vulkan { | |||
| 49 | 48 | ||
| 50 | struct VKScreenInfo; | 49 | struct VKScreenInfo; |
| 51 | 50 | ||
| 52 | using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; | ||
| 53 | |||
| 54 | struct FramebufferCacheKey { | ||
| 55 | VkRenderPass renderpass{}; | ||
| 56 | u32 width = 0; | ||
| 57 | u32 height = 0; | ||
| 58 | u32 layers = 0; | ||
| 59 | ImageViewsPack views; | ||
| 60 | |||
| 61 | std::size_t Hash() const noexcept { | ||
| 62 | std::size_t hash = 0; | ||
| 63 | boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); | ||
| 64 | for (const auto& view : views) { | ||
| 65 | boost::hash_combine(hash, static_cast<VkImageView>(view)); | ||
| 66 | } | ||
| 67 | boost::hash_combine(hash, width); | ||
| 68 | boost::hash_combine(hash, height); | ||
| 69 | boost::hash_combine(hash, layers); | ||
| 70 | return hash; | ||
| 71 | } | ||
| 72 | |||
| 73 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 74 | return std::tie(renderpass, views, width, height, layers) == | ||
| 75 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); | ||
| 76 | } | ||
| 77 | |||
| 78 | bool operator!=(const FramebufferCacheKey& rhs) const noexcept { | ||
| 79 | return !operator==(rhs); | ||
| 80 | } | ||
| 81 | }; | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
| 84 | |||
| 85 | namespace std { | ||
| 86 | |||
| 87 | template <> | ||
| 88 | struct hash<Vulkan::FramebufferCacheKey> { | ||
| 89 | std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { | ||
| 90 | return k.Hash(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | |||
| 94 | } // namespace std | ||
| 95 | |||
| 96 | namespace Vulkan { | ||
| 97 | |||
| 98 | class StateTracker; | 51 | class StateTracker; |
| 99 | class BufferBindings; | 52 | class BufferBindings; |
| 100 | 53 | ||
| 101 | struct ImageView { | ||
| 102 | View view; | ||
| 103 | VkImageLayout* layout = nullptr; | ||
| 104 | }; | ||
| 105 | |||
| 106 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 54 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 107 | public: | 55 | public: |
| 108 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 56 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 109 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 57 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 110 | VKScreenInfo& screen_info_, const VKDevice& device_, | 58 | VKScreenInfo& screen_info_, const Device& device_, |
| 111 | VKMemoryManager& memory_manager_, StateTracker& state_tracker_, | 59 | VKMemoryManager& memory_manager_, StateTracker& state_tracker_, |
| 112 | VKScheduler& scheduler_); | 60 | VKScheduler& scheduler_); |
| 113 | ~RasterizerVulkan() override; | 61 | ~RasterizerVulkan() override; |
| @@ -123,15 +71,18 @@ public: | |||
| 123 | void InvalidateRegion(VAddr addr, u64 size) override; | 71 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void OnCPUWrite(VAddr addr, u64 size) override; | 72 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 125 | void SyncGuestHost() override; | 73 | void SyncGuestHost() override; |
| 74 | void UnmapMemory(VAddr addr, u64 size) override; | ||
| 126 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 75 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 127 | void SignalSyncPoint(u32 value) override; | 76 | void SignalSyncPoint(u32 value) override; |
| 128 | void ReleaseFences() override; | 77 | void ReleaseFences() override; |
| 129 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 78 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 130 | void WaitForIdle() override; | 79 | void WaitForIdle() override; |
| 80 | void FragmentBarrier() override; | ||
| 81 | void TiledCacheBarrier() override; | ||
| 131 | void FlushCommands() override; | 82 | void FlushCommands() override; |
| 132 | void TickFrame() override; | 83 | void TickFrame() override; |
| 133 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 84 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 134 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 85 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 135 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 86 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 136 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 87 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 137 | u32 pixel_stride) override; | 88 | u32 pixel_stride) override; |
| @@ -145,11 +96,17 @@ public: | |||
| 145 | } | 96 | } |
| 146 | 97 | ||
| 147 | /// Maximum supported size that a constbuffer can have in bytes. | 98 | /// Maximum supported size that a constbuffer can have in bytes. |
| 148 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | 99 | static constexpr size_t MaxConstbufferSize = 0x10000; |
| 149 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | 100 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, |
| 150 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | 101 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); |
| 151 | 102 | ||
| 152 | private: | 103 | private: |
| 104 | static constexpr size_t MAX_TEXTURES = 192; | ||
| 105 | static constexpr size_t MAX_IMAGES = 48; | ||
| 106 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | ||
| 107 | |||
| 108 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||
| 109 | |||
| 153 | struct DrawParameters { | 110 | struct DrawParameters { |
| 154 | void Draw(vk::CommandBuffer cmdbuf) const; | 111 | void Draw(vk::CommandBuffer cmdbuf) const; |
| 155 | 112 | ||
| @@ -160,23 +117,8 @@ private: | |||
| 160 | bool is_indexed = 0; | 117 | bool is_indexed = 0; |
| 161 | }; | 118 | }; |
| 162 | 119 | ||
| 163 | using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>; | ||
| 164 | using ZetaAttachment = View; | ||
| 165 | |||
| 166 | using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; | ||
| 167 | |||
| 168 | static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; | ||
| 169 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||
| 170 | |||
| 171 | void FlushWork(); | 120 | void FlushWork(); |
| 172 | 121 | ||
| 173 | /// @brief Updates the currently bound attachments | ||
| 174 | /// @param is_clear True when the framebuffer is updated as a clear | ||
| 175 | /// @return Bitfield of attachments being used as sampled textures | ||
| 176 | Texceptions UpdateAttachments(bool is_clear); | ||
| 177 | |||
| 178 | std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); | ||
| 179 | |||
| 180 | /// Setups geometry buffers and state. | 122 | /// Setups geometry buffers and state. |
| 181 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | 123 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, |
| 182 | bool is_indexed, bool is_instanced); | 124 | bool is_indexed, bool is_instanced); |
| @@ -184,17 +126,12 @@ private: | |||
| 184 | /// Setup descriptors in the graphics pipeline. | 126 | /// Setup descriptors in the graphics pipeline. |
| 185 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | 127 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); |
| 186 | 128 | ||
| 187 | void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, | ||
| 188 | const ZetaAttachment& zeta); | ||
| 189 | |||
| 190 | void UpdateDynamicStates(); | 129 | void UpdateDynamicStates(); |
| 191 | 130 | ||
| 192 | void BeginTransformFeedback(); | 131 | void BeginTransformFeedback(); |
| 193 | 132 | ||
| 194 | void EndTransformFeedback(); | 133 | void EndTransformFeedback(); |
| 195 | 134 | ||
| 196 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | ||
| 197 | |||
| 198 | void SetupVertexArrays(BufferBindings& buffer_bindings); | 135 | void SetupVertexArrays(BufferBindings& buffer_bindings); |
| 199 | 136 | ||
| 200 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | 137 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); |
| @@ -240,14 +177,6 @@ private: | |||
| 240 | 177 | ||
| 241 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | 178 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); |
| 242 | 179 | ||
| 243 | void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); | ||
| 244 | |||
| 245 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | ||
| 246 | |||
| 247 | void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); | ||
| 248 | |||
| 249 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 250 | |||
| 251 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 180 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 252 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 181 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 253 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 182 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -264,18 +193,16 @@ private: | |||
| 264 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 193 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 265 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 194 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 266 | 195 | ||
| 267 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | 196 | size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; |
| 268 | |||
| 269 | std::size_t CalculateComputeStreamBufferSize() const; | ||
| 270 | 197 | ||
| 271 | std::size_t CalculateVertexArraysSize() const; | 198 | size_t CalculateComputeStreamBufferSize() const; |
| 272 | 199 | ||
| 273 | std::size_t CalculateIndexBufferSize() const; | 200 | size_t CalculateVertexArraysSize() const; |
| 274 | 201 | ||
| 275 | std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | 202 | size_t CalculateIndexBufferSize() const; |
| 276 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 277 | 203 | ||
| 278 | RenderPassParams GetRenderPassParams(Texceptions texceptions) const; | 204 | size_t CalculateConstBufferSize(const ConstBufferEntry& entry, |
| 205 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 279 | 206 | ||
| 280 | VkBuffer DefaultBuffer(); | 207 | VkBuffer DefaultBuffer(); |
| 281 | 208 | ||
| @@ -285,23 +212,24 @@ private: | |||
| 285 | Tegra::Engines::KeplerCompute& kepler_compute; | 212 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 286 | 213 | ||
| 287 | VKScreenInfo& screen_info; | 214 | VKScreenInfo& screen_info; |
| 288 | const VKDevice& device; | 215 | const Device& device; |
| 289 | VKMemoryManager& memory_manager; | 216 | VKMemoryManager& memory_manager; |
| 290 | StateTracker& state_tracker; | 217 | StateTracker& state_tracker; |
| 291 | VKScheduler& scheduler; | 218 | VKScheduler& scheduler; |
| 292 | 219 | ||
| 220 | VKStreamBuffer stream_buffer; | ||
| 293 | VKStagingBufferPool staging_pool; | 221 | VKStagingBufferPool staging_pool; |
| 294 | VKDescriptorPool descriptor_pool; | 222 | VKDescriptorPool descriptor_pool; |
| 295 | VKUpdateDescriptorQueue update_descriptor_queue; | 223 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 296 | VKRenderPassCache renderpass_cache; | 224 | BlitImageHelper blit_image; |
| 297 | QuadArrayPass quad_array_pass; | 225 | QuadArrayPass quad_array_pass; |
| 298 | QuadIndexedPass quad_indexed_pass; | 226 | QuadIndexedPass quad_indexed_pass; |
| 299 | Uint8Pass uint8_pass; | 227 | Uint8Pass uint8_pass; |
| 300 | 228 | ||
| 301 | VKTextureCache texture_cache; | 229 | TextureCacheRuntime texture_cache_runtime; |
| 230 | TextureCache texture_cache; | ||
| 302 | VKPipelineCache pipeline_cache; | 231 | VKPipelineCache pipeline_cache; |
| 303 | VKBufferCache buffer_cache; | 232 | VKBufferCache buffer_cache; |
| 304 | VKSamplerCache sampler_cache; | ||
| 305 | VKQueryCache query_cache; | 233 | VKQueryCache query_cache; |
| 306 | VKFenceManager fence_manager; | 234 | VKFenceManager fence_manager; |
| 307 | 235 | ||
| @@ -310,16 +238,11 @@ private: | |||
| 310 | vk::Event wfi_event; | 238 | vk::Event wfi_event; |
| 311 | VideoCommon::Shader::AsyncShaders async_shaders; | 239 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 312 | 240 | ||
| 313 | ColorAttachments color_attachments; | 241 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 314 | ZetaAttachment zeta_attachment; | 242 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 315 | 243 | boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles; | |
| 316 | std::vector<ImageView> sampled_views; | ||
| 317 | std::vector<ImageView> image_views; | ||
| 318 | 244 | ||
| 319 | u32 draw_counter = 0; | 245 | u32 draw_counter = 0; |
| 320 | |||
| 321 | // TODO(Rodrigo): Invalidate on image destruction | ||
| 322 | std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; | ||
| 323 | }; | 246 | }; |
| 324 | 247 | ||
| 325 | } // namespace Vulkan | 248 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index e812c7dd6..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null | |||
| @@ -1,158 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/cityhash.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | |||
| 16 | namespace Vulkan { | ||
| 17 | |||
| 18 | std::size_t RenderPassParams::Hash() const noexcept { | ||
| 19 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||
| 20 | return static_cast<std::size_t>(hash); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { | ||
| 24 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 25 | } | ||
| 26 | |||
| 27 | VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} | ||
| 28 | |||
| 29 | VKRenderPassCache::~VKRenderPassCache() = default; | ||
| 30 | |||
| 31 | VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { | ||
| 32 | const auto [pair, is_cache_miss] = cache.try_emplace(params); | ||
| 33 | auto& entry = pair->second; | ||
| 34 | if (is_cache_miss) { | ||
| 35 | entry = CreateRenderPass(params); | ||
| 36 | } | ||
| 37 | return *entry; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { | ||
| 41 | using namespace VideoCore::Surface; | ||
| 42 | const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); | ||
| 43 | |||
| 44 | std::vector<VkAttachmentDescription> descriptors; | ||
| 45 | descriptors.reserve(num_attachments); | ||
| 46 | |||
| 47 | std::vector<VkAttachmentReference> color_references; | ||
| 48 | color_references.reserve(num_attachments); | ||
| 49 | |||
| 50 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { | ||
| 51 | const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); | ||
| 52 | const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); | ||
| 53 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 54 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 55 | static_cast<int>(pixel_format)); | ||
| 56 | |||
| 57 | // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. | ||
| 58 | const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 | ||
| 59 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 60 | : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | ||
| 61 | descriptors.push_back({ | ||
| 62 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 63 | .format = format.format, | ||
| 64 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 65 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 66 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 67 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, | ||
| 68 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, | ||
| 69 | .initialLayout = color_layout, | ||
| 70 | .finalLayout = color_layout, | ||
| 71 | }); | ||
| 72 | |||
| 73 | color_references.push_back({ | ||
| 74 | .attachment = static_cast<u32>(rt), | ||
| 75 | .layout = color_layout, | ||
| 76 | }); | ||
| 77 | } | ||
| 78 | |||
| 79 | VkAttachmentReference zeta_attachment_ref; | ||
| 80 | const bool has_zeta = params.zeta_format != 0; | ||
| 81 | if (has_zeta) { | ||
| 82 | const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format); | ||
| 83 | const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); | ||
| 84 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 85 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 86 | static_cast<int>(pixel_format)); | ||
| 87 | |||
| 88 | const VkImageLayout zeta_layout = params.zeta_texception != 0 | ||
| 89 | ? VK_IMAGE_LAYOUT_GENERAL | ||
| 90 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | ||
| 91 | descriptors.push_back({ | ||
| 92 | .flags = 0, | ||
| 93 | .format = format.format, | ||
| 94 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 95 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 96 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 97 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 98 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 99 | .initialLayout = zeta_layout, | ||
| 100 | .finalLayout = zeta_layout, | ||
| 101 | }); | ||
| 102 | |||
| 103 | zeta_attachment_ref = { | ||
| 104 | .attachment = static_cast<u32>(num_attachments), | ||
| 105 | .layout = zeta_layout, | ||
| 106 | }; | ||
| 107 | } | ||
| 108 | |||
| 109 | const VkSubpassDescription subpass_description{ | ||
| 110 | .flags = 0, | ||
| 111 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 112 | .inputAttachmentCount = 0, | ||
| 113 | .pInputAttachments = nullptr, | ||
| 114 | .colorAttachmentCount = static_cast<u32>(color_references.size()), | ||
| 115 | .pColorAttachments = color_references.data(), | ||
| 116 | .pResolveAttachments = nullptr, | ||
| 117 | .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, | ||
| 118 | .preserveAttachmentCount = 0, | ||
| 119 | .pPreserveAttachments = nullptr, | ||
| 120 | }; | ||
| 121 | |||
| 122 | VkAccessFlags access = 0; | ||
| 123 | VkPipelineStageFlags stage = 0; | ||
| 124 | if (!color_references.empty()) { | ||
| 125 | access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | ||
| 126 | stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | ||
| 127 | } | ||
| 128 | |||
| 129 | if (has_zeta) { | ||
| 130 | access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 131 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | ||
| 132 | stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | ||
| 133 | } | ||
| 134 | |||
| 135 | const VkSubpassDependency subpass_dependency{ | ||
| 136 | .srcSubpass = VK_SUBPASS_EXTERNAL, | ||
| 137 | .dstSubpass = 0, | ||
| 138 | .srcStageMask = stage, | ||
| 139 | .dstStageMask = stage, | ||
| 140 | .srcAccessMask = 0, | ||
| 141 | .dstAccessMask = access, | ||
| 142 | .dependencyFlags = 0, | ||
| 143 | }; | ||
| 144 | |||
| 145 | return device.GetLogical().CreateRenderPass({ | ||
| 146 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 147 | .pNext = nullptr, | ||
| 148 | .flags = 0, | ||
| 149 | .attachmentCount = static_cast<u32>(descriptors.size()), | ||
| 150 | .pAttachments = descriptors.data(), | ||
| 151 | .subpassCount = 1, | ||
| 152 | .pSubpasses = &subpass_description, | ||
| 153 | .dependencyCount = 1, | ||
| 154 | .pDependencies = &subpass_dependency, | ||
| 155 | }); | ||
| 156 | } | ||
| 157 | |||
| 158 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 652ecef7b..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null | |||
| @@ -1,70 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <boost/container/static_vector.hpp> | ||
| 11 | #include <boost/functional/hash.hpp> | ||
| 12 | |||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | #include "video_core/surface.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | class VKDevice; | ||
| 20 | |||
| 21 | struct RenderPassParams { | ||
| 22 | std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats; | ||
| 23 | u8 num_color_attachments; | ||
| 24 | u8 texceptions; | ||
| 25 | |||
| 26 | u8 zeta_format; | ||
| 27 | u8 zeta_texception; | ||
| 28 | |||
| 29 | std::size_t Hash() const noexcept; | ||
| 30 | |||
| 31 | bool operator==(const RenderPassParams& rhs) const noexcept; | ||
| 32 | |||
| 33 | bool operator!=(const RenderPassParams& rhs) const noexcept { | ||
| 34 | return !operator==(rhs); | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | static_assert(std::has_unique_object_representations_v<RenderPassParams>); | ||
| 38 | static_assert(std::is_trivially_copyable_v<RenderPassParams>); | ||
| 39 | static_assert(std::is_trivially_constructible_v<RenderPassParams>); | ||
| 40 | |||
| 41 | } // namespace Vulkan | ||
| 42 | |||
| 43 | namespace std { | ||
| 44 | |||
| 45 | template <> | ||
| 46 | struct hash<Vulkan::RenderPassParams> { | ||
| 47 | std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { | ||
| 48 | return k.Hash(); | ||
| 49 | } | ||
| 50 | }; | ||
| 51 | |||
| 52 | } // namespace std | ||
| 53 | |||
| 54 | namespace Vulkan { | ||
| 55 | |||
| 56 | class VKRenderPassCache final { | ||
| 57 | public: | ||
| 58 | explicit VKRenderPassCache(const VKDevice& device_); | ||
| 59 | ~VKRenderPassCache(); | ||
| 60 | |||
| 61 | VkRenderPass GetRenderPass(const RenderPassParams& params); | ||
| 62 | |||
| 63 | private: | ||
| 64 | vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; | ||
| 65 | |||
| 66 | const VKDevice& device; | ||
| 67 | std::unordered_map<RenderPassParams, vk::RenderPass> cache; | ||
| 68 | }; | ||
| 69 | |||
| 70 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b859691fa..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null | |||
| @@ -1,83 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 9 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | using Tegra::Texture::TextureMipmapFilter; | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | VkBorderColor ConvertBorderColor(std::array<float, 4> color) { | ||
| 19 | // TODO(Rodrigo): Manage integer border colors | ||
| 20 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | ||
| 21 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 22 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { | ||
| 23 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 24 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { | ||
| 25 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 26 | } | ||
| 27 | if (color[0] + color[1] + color[2] > 1.35f) { | ||
| 28 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 29 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 30 | } else if (color[3] > 0.5f) { | ||
| 31 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 32 | } else { | ||
| 33 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} | ||
| 40 | |||
| 41 | VKSamplerCache::~VKSamplerCache() = default; | ||
| 42 | |||
| 43 | vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { | ||
| 44 | const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); | ||
| 45 | const std::array color = tsc.GetBorderColor(); | ||
| 46 | |||
| 47 | VkSamplerCustomBorderColorCreateInfoEXT border{ | ||
| 48 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, | ||
| 49 | .pNext = nullptr, | ||
| 50 | .customBorderColor = {}, | ||
| 51 | .format = VK_FORMAT_UNDEFINED, | ||
| 52 | }; | ||
| 53 | std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); | ||
| 54 | |||
| 55 | return device.GetLogical().CreateSampler({ | ||
| 56 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 57 | .pNext = arbitrary_borders ? &border : nullptr, | ||
| 58 | .flags = 0, | ||
| 59 | .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), | ||
| 60 | .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), | ||
| 61 | .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | ||
| 62 | .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), | ||
| 63 | .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), | ||
| 64 | .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), | ||
| 65 | .mipLodBias = tsc.GetLodBias(), | ||
| 66 | .anisotropyEnable = | ||
| 67 | static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), | ||
| 68 | .maxAnisotropy = tsc.GetMaxAnisotropy(), | ||
| 69 | .compareEnable = tsc.depth_compare_enabled, | ||
| 70 | .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), | ||
| 71 | .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), | ||
| 72 | .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), | ||
| 73 | .borderColor = | ||
| 74 | arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), | ||
| 75 | .unnormalizedCoordinates = VK_FALSE, | ||
| 76 | }); | ||
| 77 | } | ||
| 78 | |||
| 79 | VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { | ||
| 80 | return *sampler; | ||
| 81 | } | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index 3f22c4610..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 8 | #include "video_core/sampler_cache.h" | ||
| 9 | #include "video_core/textures/texture.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class VKDevice; | ||
| 14 | |||
| 15 | class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { | ||
| 16 | public: | ||
| 17 | explicit VKSamplerCache(const VKDevice& device_); | ||
| 18 | ~VKSamplerCache(); | ||
| 19 | |||
| 20 | protected: | ||
| 21 | vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; | ||
| 22 | |||
| 23 | VkSampler ToSamplerType(const vk::Sampler& sampler) const override; | ||
| 24 | |||
| 25 | private: | ||
| 26 | const VKDevice& device; | ||
| 27 | }; | ||
| 28 | |||
| 29 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..66004f9c0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -11,12 +11,13 @@ | |||
| 11 | #include "common/microprofile.h" | 11 | #include "common/microprofile.h" |
| 12 | #include "common/thread.h" | 12 | #include "common/thread.h" |
| 13 | #include "video_core/renderer_vulkan/vk_command_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_command_pool.h" |
| 14 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 14 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 16 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 15 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 17 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | 18 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 20 | 21 | ||
| 21 | namespace Vulkan { | 22 | namespace Vulkan { |
| 22 | 23 | ||
| @@ -36,7 +37,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { | |||
| 36 | last = nullptr; | 37 | last = nullptr; |
| 37 | } | 38 | } |
| 38 | 39 | ||
| 39 | VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) | 40 | VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) |
| 40 | : device{device_}, state_tracker{state_tracker_}, | 41 | : device{device_}, state_tracker{state_tracker_}, |
| 41 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, | 42 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, |
| 42 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { | 43 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { |
| @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() { | |||
| 96 | AcquireNewChunk(); | 97 | AcquireNewChunk(); |
| 97 | } | 98 | } |
| 98 | 99 | ||
| 99 | void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, | 100 | void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { |
| 100 | VkExtent2D render_area) { | 101 | const VkRenderPass renderpass = framebuffer->RenderPass(); |
| 101 | if (renderpass == state.renderpass && framebuffer == state.framebuffer && | 102 | const VkFramebuffer framebuffer_handle = framebuffer->Handle(); |
| 103 | const VkExtent2D render_area = framebuffer->RenderArea(); | ||
| 104 | if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && | ||
| 102 | render_area.width == state.render_area.width && | 105 | render_area.width == state.render_area.width && |
| 103 | render_area.height == state.render_area.height) { | 106 | render_area.height == state.render_area.height) { |
| 104 | return; | 107 | return; |
| 105 | } | 108 | } |
| 106 | const bool end_renderpass = state.renderpass != nullptr; | 109 | EndRenderPass(); |
| 107 | state.renderpass = renderpass; | 110 | state.renderpass = renderpass; |
| 108 | state.framebuffer = framebuffer; | 111 | state.framebuffer = framebuffer_handle; |
| 109 | state.render_area = render_area; | 112 | state.render_area = render_area; |
| 110 | 113 | ||
| 111 | const VkRenderPassBeginInfo renderpass_bi{ | 114 | Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { |
| 112 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 115 | const VkRenderPassBeginInfo renderpass_bi{ |
| 113 | .pNext = nullptr, | 116 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 114 | .renderPass = renderpass, | 117 | .pNext = nullptr, |
| 115 | .framebuffer = framebuffer, | 118 | .renderPass = renderpass, |
| 116 | .renderArea = | 119 | .framebuffer = framebuffer_handle, |
| 117 | { | 120 | .renderArea = |
| 118 | .offset = {.x = 0, .y = 0}, | 121 | { |
| 119 | .extent = render_area, | 122 | .offset = {.x = 0, .y = 0}, |
| 120 | }, | 123 | .extent = render_area, |
| 121 | .clearValueCount = 0, | 124 | }, |
| 122 | .pClearValues = nullptr, | 125 | .clearValueCount = 0, |
| 123 | }; | 126 | .pClearValues = nullptr, |
| 124 | 127 | }; | |
| 125 | Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { | ||
| 126 | if (end_renderpass) { | ||
| 127 | cmdbuf.EndRenderPass(); | ||
| 128 | } | ||
| 129 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 128 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 130 | }); | 129 | }); |
| 130 | num_renderpass_images = framebuffer->NumImages(); | ||
| 131 | renderpass_images = framebuffer->Images(); | ||
| 132 | renderpass_image_ranges = framebuffer->ImageRanges(); | ||
| 131 | } | 133 | } |
| 132 | 134 | ||
| 133 | void VKScheduler::RequestOutsideRenderPassOperationContext() { | 135 | void VKScheduler::RequestOutsideRenderPassOperationContext() { |
| @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() { | |||
| 241 | if (!state.renderpass) { | 243 | if (!state.renderpass) { |
| 242 | return; | 244 | return; |
| 243 | } | 245 | } |
| 246 | Record([num_images = num_renderpass_images, images = renderpass_images, | ||
| 247 | ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { | ||
| 248 | std::array<VkImageMemoryBarrier, 9> barriers; | ||
| 249 | for (size_t i = 0; i < num_images; ++i) { | ||
| 250 | barriers[i] = VkImageMemoryBarrier{ | ||
| 251 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 252 | .pNext = nullptr, | ||
| 253 | .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 254 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, | ||
| 255 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 256 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 257 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 258 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 259 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, | ||
| 260 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 261 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 262 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 263 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 264 | .image = images[i], | ||
| 265 | .subresourceRange = ranges[i], | ||
| 266 | }; | ||
| 267 | } | ||
| 268 | cmdbuf.EndRenderPass(); | ||
| 269 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | ||
| 270 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | ||
| 271 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||
| 272 | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | ||
| 273 | vk::Span(barriers.data(), num_images)); | ||
| 274 | }); | ||
| 244 | state.renderpass = nullptr; | 275 | state.renderpass = nullptr; |
| 245 | Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); | 276 | num_renderpass_images = 0; |
| 246 | } | 277 | } |
| 247 | 278 | ||
| 248 | void VKScheduler::AcquireNewChunk() { | 279 | void VKScheduler::AcquireNewChunk() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6d3a5da0b..4cd43e425 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -12,21 +12,22 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/threadsafe_queue.h" | 14 | #include "common/threadsafe_queue.h" |
| 15 | #include "video_core/renderer_vulkan/wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | 18 | ||
| 19 | class CommandPool; | 19 | class CommandPool; |
| 20 | class Device; | ||
| 21 | class Framebuffer; | ||
| 20 | class MasterSemaphore; | 22 | class MasterSemaphore; |
| 21 | class StateTracker; | 23 | class StateTracker; |
| 22 | class VKDevice; | ||
| 23 | class VKQueryCache; | 24 | class VKQueryCache; |
| 24 | 25 | ||
| 25 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 26 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 26 | /// OpenGL-like operations on Vulkan command buffers. | 27 | /// OpenGL-like operations on Vulkan command buffers. |
| 27 | class VKScheduler { | 28 | class VKScheduler { |
| 28 | public: | 29 | public: |
| 29 | explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); | 30 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); |
| 30 | ~VKScheduler(); | 31 | ~VKScheduler(); |
| 31 | 32 | ||
| 32 | /// Returns the current command buffer tick. | 33 | /// Returns the current command buffer tick. |
| @@ -52,8 +53,7 @@ public: | |||
| 52 | void DispatchWork(); | 53 | void DispatchWork(); |
| 53 | 54 | ||
| 54 | /// Requests to begin a renderpass. | 55 | /// Requests to begin a renderpass. |
| 55 | void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, | 56 | void RequestRenderpass(const Framebuffer* framebuffer); |
| 56 | VkExtent2D render_area); | ||
| 57 | 57 | ||
| 58 | /// Requests the current executino context to be able to execute operations only allowed outside | 58 | /// Requests the current executino context to be able to execute operations only allowed outside |
| 59 | /// of a renderpass. | 59 | /// of a renderpass. |
| @@ -62,6 +62,9 @@ public: | |||
| 62 | /// Binds a pipeline to the current execution context. | 62 | /// Binds a pipeline to the current execution context. |
| 63 | void BindGraphicsPipeline(VkPipeline pipeline); | 63 | void BindGraphicsPipeline(VkPipeline pipeline); |
| 64 | 64 | ||
| 65 | /// Invalidates current command buffer state except for render passes | ||
| 66 | void InvalidateState(); | ||
| 67 | |||
| 65 | /// Assigns the query cache. | 68 | /// Assigns the query cache. |
| 66 | void SetQueryCache(VKQueryCache& query_cache_) { | 69 | void SetQueryCache(VKQueryCache& query_cache_) { |
| 67 | query_cache = &query_cache_; | 70 | query_cache = &query_cache_; |
| @@ -170,15 +173,13 @@ private: | |||
| 170 | 173 | ||
| 171 | void AllocateNewContext(); | 174 | void AllocateNewContext(); |
| 172 | 175 | ||
| 173 | void InvalidateState(); | ||
| 174 | |||
| 175 | void EndPendingOperations(); | 176 | void EndPendingOperations(); |
| 176 | 177 | ||
| 177 | void EndRenderPass(); | 178 | void EndRenderPass(); |
| 178 | 179 | ||
| 179 | void AcquireNewChunk(); | 180 | void AcquireNewChunk(); |
| 180 | 181 | ||
| 181 | const VKDevice& device; | 182 | const Device& device; |
| 182 | StateTracker& state_tracker; | 183 | StateTracker& state_tracker; |
| 183 | 184 | ||
| 184 | std::unique_ptr<MasterSemaphore> master_semaphore; | 185 | std::unique_ptr<MasterSemaphore> master_semaphore; |
| @@ -192,6 +193,11 @@ private: | |||
| 192 | std::thread worker_thread; | 193 | std::thread worker_thread; |
| 193 | 194 | ||
| 194 | State state; | 195 | State state; |
| 196 | |||
| 197 | u32 num_renderpass_images = 0; | ||
| 198 | std::array<VkImage, 9> renderpass_images{}; | ||
| 199 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; | ||
| 200 | |||
| 195 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; | 201 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; |
| 196 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | 202 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; |
| 197 | std::mutex mutex; | 203 | std::mutex mutex; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 72954d0e3..89cbe01ad 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -22,11 +22,11 @@ | |||
| 22 | #include "video_core/engines/shader_bytecode.h" | 22 | #include "video_core/engines/shader_bytecode.h" |
| 23 | #include "video_core/engines/shader_header.h" | 23 | #include "video_core/engines/shader_header.h" |
| 24 | #include "video_core/engines/shader_type.h" | 24 | #include "video_core/engines/shader_type.h" |
| 25 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 25 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 27 | #include "video_core/shader/node.h" | 26 | #include "video_core/shader/node.h" |
| 28 | #include "video_core/shader/shader_ir.h" | 27 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | 28 | #include "video_core/shader/transform_feedback.h" |
| 29 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 30 | 30 | ||
| 31 | namespace Vulkan { | 31 | namespace Vulkan { |
| 32 | 32 | ||
| @@ -102,7 +102,7 @@ struct GenericVaryingDescription { | |||
| 102 | bool is_scalar = false; | 102 | bool is_scalar = false; |
| 103 | }; | 103 | }; |
| 104 | 104 | ||
| 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const SamplerEntry& sampler) { |
| 106 | ASSERT(!sampler.is_buffer); | 106 | ASSERT(!sampler.is_buffer); |
| 107 | switch (sampler.type) { | 107 | switch (sampler.type) { |
| 108 | case Tegra::Shader::TextureType::Texture1D: | 108 | case Tegra::Shader::TextureType::Texture1D: |
| @@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { | |||
| 119 | } | 119 | } |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | std::pair<spv::Dim, bool> GetImageDim(const Image& image) { | 122 | std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { |
| 123 | switch (image.type) { | 123 | switch (image.type) { |
| 124 | case Tegra::Shader::ImageType::Texture1D: | 124 | case Tegra::Shader::ImageType::Texture1D: |
| 125 | return {spv::Dim::Dim1D, false}; | 125 | return {spv::Dim::Dim1D, false}; |
| @@ -272,19 +272,12 @@ bool IsPrecise(Operation operand) { | |||
| 272 | return false; | 272 | return false; |
| 273 | } | 273 | } |
| 274 | 274 | ||
| 275 | u32 ShaderVersion(const VKDevice& device) { | ||
| 276 | if (device.InstanceApiVersion() < VK_API_VERSION_1_1) { | ||
| 277 | return 0x00010000; | ||
| 278 | } | ||
| 279 | return 0x00010300; | ||
| 280 | } | ||
| 281 | |||
| 282 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 283 | public: | 276 | public: |
| 284 | explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, | 277 | explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, |
| 285 | const Registry& registry_, const Specialization& specialization_) | 278 | const Registry& registry_, const Specialization& specialization_) |
| 286 | : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, | 279 | : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, |
| 287 | header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { | 280 | registry{registry_}, specialization{specialization_} { |
| 288 | if (stage_ != ShaderType::Compute) { | 281 | if (stage_ != ShaderType::Compute) { |
| 289 | transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); | 282 | transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); |
| 290 | } | 283 | } |
| @@ -980,7 +973,7 @@ private: | |||
| 980 | return binding; | 973 | return binding; |
| 981 | } | 974 | } |
| 982 | 975 | ||
| 983 | void DeclareImage(const Image& image, u32& binding) { | 976 | void DeclareImage(const ImageEntry& image, u32& binding) { |
| 984 | const auto [dim, arrayed] = GetImageDim(image); | 977 | const auto [dim, arrayed] = GetImageDim(image); |
| 985 | constexpr int depth = 0; | 978 | constexpr int depth = 0; |
| 986 | constexpr bool ms = false; | 979 | constexpr bool ms = false; |
| @@ -2749,7 +2742,7 @@ private: | |||
| 2749 | }; | 2742 | }; |
| 2750 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2743 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2751 | 2744 | ||
| 2752 | const VKDevice& device; | 2745 | const Device& device; |
| 2753 | const ShaderIR& ir; | 2746 | const ShaderIR& ir; |
| 2754 | const ShaderType stage; | 2747 | const ShaderType stage; |
| 2755 | const Tegra::Shader::Header header; | 2748 | const Tegra::Shader::Header header; |
| @@ -3137,7 +3130,7 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3137 | return entries; | 3130 | return entries; |
| 3138 | } | 3131 | } |
| 3139 | 3132 | ||
| 3140 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 3133 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 3141 | ShaderType stage, const VideoCommon::Shader::Registry& registry, | 3134 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 3142 | const Specialization& specialization) { | 3135 | const Specialization& specialization) { |
| 3143 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | 3136 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index df1812514..26381e444 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,16 +15,14 @@ | |||
| 15 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | class VKDevice; | ||
| 19 | } | ||
| 20 | 18 | ||
| 21 | namespace Vulkan { | 19 | class Device; |
| 22 | 20 | ||
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 24 | using UniformTexelEntry = VideoCommon::Shader::Sampler; | 22 | using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; |
| 25 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; |
| 26 | using StorageTexelEntry = VideoCommon::Shader::Image; | 24 | using StorageTexelEntry = VideoCommon::Shader::ImageEntry; |
| 27 | using ImageEntry = VideoCommon::Shader::Image; | 25 | using ImageEntry = VideoCommon::Shader::ImageEntry; |
| 28 | 26 | ||
| 29 | constexpr u32 DESCRIPTOR_SET = 0; | 27 | constexpr u32 DESCRIPTOR_SET = 0; |
| 30 | 28 | ||
| @@ -109,7 +107,7 @@ struct SPIRVShader { | |||
| 109 | 107 | ||
| 110 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 108 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 111 | 109 | ||
| 112 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 110 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 113 | Tegra::Engines::ShaderType stage, | 111 | Tegra::Engines::ShaderType stage, |
| 114 | const VideoCommon::Shader::Registry& registry, | 112 | const VideoCommon::Shader::Registry& registry, |
| 115 | const Specialization& specialization); | 113 | const Specialization& specialization); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..aaad4f292 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp | |||
| @@ -7,24 +7,19 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 10 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_device.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| 16 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { | 16 | vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) { |
| 17 | // Avoid undefined behavior by copying to a staging allocation | ||
| 18 | ASSERT(code_size % sizeof(u32) == 0); | ||
| 19 | const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); | ||
| 20 | std::memcpy(data.get(), code_data, code_size); | ||
| 21 | |||
| 22 | return device.GetLogical().CreateShaderModule({ | 17 | return device.GetLogical().CreateShaderModule({ |
| 23 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 18 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| 24 | .pNext = nullptr, | 19 | .pNext = nullptr, |
| 25 | .flags = 0, | 20 | .flags = 0, |
| 26 | .codeSize = code_size, | 21 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 27 | .pCode = data.get(), | 22 | .pCode = code.data(), |
| 28 | }); | 23 | }); |
| 29 | } | 24 | } |
| 30 | 25 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..9517cbe84 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h | |||
| @@ -4,13 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <span> | ||
| 8 | |||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 9 | 11 | ||
| 10 | namespace Vulkan { | 12 | namespace Vulkan { |
| 11 | 13 | ||
| 12 | class VKDevice; | 14 | class Device; |
| 13 | 15 | ||
| 14 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); | 16 | vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code); |
| 15 | 17 | ||
| 16 | } // namespace Vulkan | 18 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 2fd3b7f39..1e0b8b922 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -9,17 +9,17 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/bit_util.h" | 10 | #include "common/bit_util.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 15 | #include "video_core/renderer_vulkan/wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_device.h" |
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | 18 | ||
| 19 | VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) | 19 | VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) |
| 20 | : buffer{std::move(buffer_)} {} | 20 | : buffer{std::move(buffer_)} {} |
| 21 | 21 | ||
| 22 | VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, | 22 | VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_, |
| 23 | VKScheduler& scheduler_) | 23 | VKScheduler& scheduler_) |
| 24 | : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} | 24 | : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} |
| 25 | 25 | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 2dd5049ac..90dadcbbe 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -10,11 +10,11 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | 11 | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | class VKDevice; | 17 | class Device; |
| 18 | class VKScheduler; | 18 | class VKScheduler; |
| 19 | 19 | ||
| 20 | struct VKBuffer final { | 20 | struct VKBuffer final { |
| @@ -24,7 +24,7 @@ struct VKBuffer final { | |||
| 24 | 24 | ||
| 25 | class VKStagingBufferPool final { | 25 | class VKStagingBufferPool final { |
| 26 | public: | 26 | public: |
| 27 | explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, | 27 | explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager, |
| 28 | VKScheduler& scheduler); | 28 | VKScheduler& scheduler); |
| 29 | ~VKStagingBufferPool(); | 29 | ~VKStagingBufferPool(); |
| 30 | 30 | ||
| @@ -58,7 +58,7 @@ private: | |||
| 58 | 58 | ||
| 59 | u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); | 59 | u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); |
| 60 | 60 | ||
| 61 | const VKDevice& device; | 61 | const Device& device; |
| 62 | VKMemoryManager& memory_manager; | 62 | VKMemoryManager& memory_manager; |
| 63 | VKScheduler& scheduler; | 63 | VKScheduler& scheduler; |
| 64 | 64 | ||
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 50164cc08..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | ||
| 6 | #include <cstddef> | 7 | #include <cstddef> |
| 7 | #include <iterator> | 8 | #include <iterator> |
| 8 | 9 | ||
| @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| 29 | using Flags = Maxwell3D::DirtyState::Flags; | 30 | using Flags = Maxwell3D::DirtyState::Flags; |
| 30 | 31 | ||
| 31 | Flags MakeInvalidationFlags() { | 32 | Flags MakeInvalidationFlags() { |
| 33 | static constexpr std::array INVALIDATION_FLAGS{ | ||
| 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | ||
| 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | ||
| 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, | ||
| 37 | }; | ||
| 32 | Flags flags{}; | 38 | Flags flags{}; |
| 33 | flags[Viewports] = true; | 39 | for (const int flag : INVALIDATION_FLAGS) { |
| 34 | flags[Scissors] = true; | 40 | flags[flag] = true; |
| 35 | flags[DepthBias] = true; | 41 | } |
| 36 | flags[BlendConstants] = true; | ||
| 37 | flags[DepthBounds] = true; | ||
| 38 | flags[StencilProperties] = true; | ||
| 39 | flags[CullMode] = true; | ||
| 40 | flags[DepthBoundsEnable] = true; | ||
| 41 | flags[DepthTestEnable] = true; | ||
| 42 | flags[DepthWriteEnable] = true; | ||
| 43 | flags[DepthCompareOp] = true; | ||
| 44 | flags[FrontFace] = true; | ||
| 45 | flags[StencilOp] = true; | ||
| 46 | flags[StencilTestEnable] = true; | ||
| 47 | return flags; | 42 | return flags; |
| 48 | } | 43 | } |
| 49 | 44 | ||
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -52,6 +52,14 @@ public: | |||
| 52 | current_topology = INVALID_TOPOLOGY; | 52 | current_topology = INVALID_TOPOLOGY; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void InvalidateViewports() { | ||
| 56 | flags[Dirty::Viewports] = true; | ||
| 57 | } | ||
| 58 | |||
| 59 | void InvalidateScissors() { | ||
| 60 | flags[Dirty::Scissors] = true; | ||
| 61 | } | ||
| 62 | |||
| 55 | bool TouchViewports() { | 63 | bool TouchViewports() { |
| 56 | return Exchange(Dirty::Viewports, false); | 64 | return Exchange(Dirty::Viewports, false); |
| 57 | } | 65 | } |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..a09fe084e 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -10,15 +10,19 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 15 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 14 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_device.h" |
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| 20 | namespace { | 20 | namespace { |
| 21 | 21 | ||
| 22 | constexpr VkBufferUsageFlags BUFFER_USAGE = | ||
| 23 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 24 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | ||
| 25 | |||
| 22 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | 26 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; |
| 23 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | 27 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; |
| 24 | 28 | ||
| @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, | |||
| 56 | 60 | ||
| 57 | } // Anonymous namespace | 61 | } // Anonymous namespace |
| 58 | 62 | ||
| 59 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, | 63 | VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_) |
| 60 | VkBufferUsageFlags usage) | ||
| 61 | : device{device_}, scheduler{scheduler_} { | 64 | : device{device_}, scheduler{scheduler_} { |
| 62 | CreateBuffers(usage); | 65 | CreateBuffers(); |
| 63 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); | 66 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); |
| 64 | ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); | 67 | ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); |
| 65 | } | 68 | } |
| 66 | 69 | ||
| 67 | VKStreamBuffer::~VKStreamBuffer() = default; | 70 | VKStreamBuffer::~VKStreamBuffer() = default; |
| 68 | 71 | ||
| 69 | std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | 72 | std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) { |
| 70 | ASSERT(size <= stream_buffer_size); | 73 | ASSERT(size <= stream_buffer_size); |
| 71 | mapped_size = size; | 74 | mapped_size = size; |
| 72 | 75 | ||
| @@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 76 | 79 | ||
| 77 | WaitPendingOperations(offset); | 80 | WaitPendingOperations(offset); |
| 78 | 81 | ||
| 79 | bool invalidated = false; | ||
| 80 | if (offset + size > stream_buffer_size) { | 82 | if (offset + size > stream_buffer_size) { |
| 81 | // The buffer would overflow, save the amount of used watches and reset the state. | 83 | // The buffer would overflow, save the amount of used watches and reset the state. |
| 82 | invalidation_mark = current_watch_cursor; | 84 | invalidation_mark = current_watch_cursor; |
| @@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 90 | 92 | ||
| 91 | // Ensure that we don't wait for uncommitted fences. | 93 | // Ensure that we don't wait for uncommitted fences. |
| 92 | scheduler.Flush(); | 94 | scheduler.Flush(); |
| 93 | |||
| 94 | invalidated = true; | ||
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | return {memory.Map(offset, size), offset, invalidated}; | 97 | return std::make_pair(memory.Map(offset, size), offset); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | void VKStreamBuffer::Unmap(u64 size) { | 100 | void VKStreamBuffer::Unmap(u64 size) { |
| @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) { | |||
| 113 | watch.tick = scheduler.CurrentTick(); | 113 | watch.tick = scheduler.CurrentTick(); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { | 116 | void VKStreamBuffer::CreateBuffers() { |
| 117 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | 117 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); |
| 118 | const u32 preferred_type = GetMemoryType(memory_properties); | 118 | const u32 preferred_type = GetMemoryType(memory_properties); |
| 119 | const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; | 119 | const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; |
| @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { | |||
| 127 | .pNext = nullptr, | 127 | .pNext = nullptr, |
| 128 | .flags = 0, | 128 | .flags = 0, |
| 129 | .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), | 129 | .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), |
| 130 | .usage = usage, | 130 | .usage = BUFFER_USAGE, |
| 131 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 131 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 132 | .queueFamilyIndexCount = 0, | 132 | .queueFamilyIndexCount = 0, |
| 133 | .pQueueFamilyIndices = nullptr, | 133 | .pQueueFamilyIndices = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..2e9c8cb46 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -5,31 +5,29 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <tuple> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| 16 | class VKDevice; | 16 | class Device; |
| 17 | class VKFenceWatch; | 17 | class VKFenceWatch; |
| 18 | class VKScheduler; | 18 | class VKScheduler; |
| 19 | 19 | ||
| 20 | class VKStreamBuffer final { | 20 | class VKStreamBuffer final { |
| 21 | public: | 21 | public: |
| 22 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | 22 | explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler); |
| 23 | VkBufferUsageFlags usage); | ||
| 24 | ~VKStreamBuffer(); | 23 | ~VKStreamBuffer(); |
| 25 | 24 | ||
| 26 | /** | 25 | /** |
| 27 | * Reserves a region of memory from the stream buffer. | 26 | * Reserves a region of memory from the stream buffer. |
| 28 | * @param size Size to reserve. | 27 | * @param size Size to reserve. |
| 29 | * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer | 28 | * @returns A pair of a raw memory pointer (with offset added), and the buffer offset |
| 30 | * offset and a boolean that's true when buffer has been invalidated. | ||
| 31 | */ | 29 | */ |
| 32 | std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); | 30 | std::pair<u8*, u64> Map(u64 size, u64 alignment); |
| 33 | 31 | ||
| 34 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 32 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 35 | void Unmap(u64 size); | 33 | void Unmap(u64 size); |
| @@ -49,14 +47,14 @@ private: | |||
| 49 | }; | 47 | }; |
| 50 | 48 | ||
| 51 | /// Creates Vulkan buffer handles committing the required the required memory. | 49 | /// Creates Vulkan buffer handles committing the required the required memory. |
| 52 | void CreateBuffers(VkBufferUsageFlags usage); | 50 | void CreateBuffers(); |
| 53 | 51 | ||
| 54 | /// Increases the amount of watches available. | 52 | /// Increases the amount of watches available. |
| 55 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); | 53 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); |
| 56 | 54 | ||
| 57 | void WaitPendingOperations(u64 requested_upper_bound); | 55 | void WaitPendingOperations(u64 requested_upper_bound); |
| 58 | 56 | ||
| 59 | const VKDevice& device; ///< Vulkan device manager. | 57 | const Device& device; ///< Vulkan device manager. |
| 60 | VKScheduler& scheduler; ///< Command scheduler. | 58 | VKScheduler& scheduler; ///< Command scheduler. |
| 61 | 59 | ||
| 62 | vk::Buffer buffer; ///< Mapped buffer. | 60 | vk::Buffer buffer; ///< Mapped buffer. |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9636a7c65..725a2a05d 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -11,10 +11,10 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/frontend/framebuffer_layout.h" | 13 | #include "core/frontend/framebuffer_layout.h" |
| 14 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 16 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 15 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 17 | #include "video_core/renderer_vulkan/wrapper.h" | 16 | #include "video_core/vulkan_common/vulkan_device.h" |
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 18 | 18 | ||
| 19 | namespace Vulkan { | 19 | namespace Vulkan { |
| 20 | 20 | ||
| @@ -56,7 +56,7 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi | |||
| 56 | 56 | ||
| 57 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 58 | 58 | ||
| 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) | 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) |
| 60 | : surface{surface_}, device{device_}, scheduler{scheduler_} {} | 60 | : surface{surface_}, device{device_}, scheduler{scheduler_} {} |
| 61 | 61 | ||
| 62 | VKSwapchain::~VKSwapchain() = default; | 62 | VKSwapchain::~VKSwapchain() = default; |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 6b39befdf..2eadd62b3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Layout { | 12 | namespace Layout { |
| 13 | struct FramebufferLayout; | 13 | struct FramebufferLayout; |
| @@ -15,12 +15,12 @@ struct FramebufferLayout; | |||
| 15 | 15 | ||
| 16 | namespace Vulkan { | 16 | namespace Vulkan { |
| 17 | 17 | ||
| 18 | class VKDevice; | 18 | class Device; |
| 19 | class VKScheduler; | 19 | class VKScheduler; |
| 20 | 20 | ||
| 21 | class VKSwapchain { | 21 | class VKSwapchain { |
| 22 | public: | 22 | public: |
| 23 | explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); | 23 | explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); |
| 24 | ~VKSwapchain(); | 24 | ~VKSwapchain(); |
| 25 | 25 | ||
| 26 | /// Creates (or recreates) the swapchain with a given size. | 26 | /// Creates (or recreates) the swapchain with a given size. |
| @@ -73,7 +73,7 @@ private: | |||
| 73 | void Destroy(); | 73 | void Destroy(); |
| 74 | 74 | ||
| 75 | const VkSurfaceKHR surface; | 75 | const VkSurfaceKHR surface; |
| 76 | const VKDevice& device; | 76 | const Device& device; |
| 77 | VKScheduler& scheduler; | 77 | VKScheduler& scheduler; |
| 78 | 78 | ||
| 79 | vk::SwapchainKHR swapchain; | 79 | vk::SwapchainKHR swapchain; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae2e3322c..bd11de012 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -4,614 +4,1105 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <span> |
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <variant> | ||
| 11 | #include <vector> | 8 | #include <vector> |
| 12 | 9 | ||
| 13 | #include "common/assert.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 14 | #include "common/common_types.h" | 11 | #include "video_core/renderer_vulkan/blit_image.h" |
| 15 | #include "core/core.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/morton.h" | ||
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 19 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 13 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 21 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 14 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 22 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 23 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 16 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 24 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 17 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 25 | #include "video_core/renderer_vulkan/wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_device.h" |
| 26 | #include "video_core/surface.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 27 | 20 | ||
| 28 | namespace Vulkan { | 21 | namespace Vulkan { |
| 29 | 22 | ||
| 30 | using VideoCore::MortonSwizzle; | 23 | using Tegra::Engines::Fermi2D; |
| 31 | using VideoCore::MortonSwizzleMode; | ||
| 32 | |||
| 33 | using Tegra::Texture::SwizzleSource; | 24 | using Tegra::Texture::SwizzleSource; |
| 34 | using VideoCore::Surface::PixelFormat; | 25 | using Tegra::Texture::TextureMipmapFilter; |
| 35 | using VideoCore::Surface::SurfaceTarget; | 26 | using VideoCommon::BufferImageCopy; |
| 27 | using VideoCommon::ImageInfo; | ||
| 28 | using VideoCommon::ImageType; | ||
| 29 | using VideoCommon::SubresourceRange; | ||
| 30 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 36 | 31 | ||
| 37 | namespace { | 32 | namespace { |
| 38 | 33 | ||
| 39 | VkImageType SurfaceTargetToImage(SurfaceTarget target) { | 34 | constexpr std::array ATTACHMENT_REFERENCES{ |
| 40 | switch (target) { | 35 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, |
| 41 | case SurfaceTarget::Texture1D: | 36 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, |
| 42 | case SurfaceTarget::Texture1DArray: | 37 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, |
| 38 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 39 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 40 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 41 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 42 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 43 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 44 | }; | ||
| 45 | |||
| 46 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | ||
| 47 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | ||
| 48 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 49 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { | ||
| 50 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 51 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { | ||
| 52 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 53 | } | ||
| 54 | if (color[0] + color[1] + color[2] > 1.35f) { | ||
| 55 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 56 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 57 | } else if (color[3] > 0.5f) { | ||
| 58 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 59 | } else { | ||
| 60 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | [[nodiscard]] VkImageType ConvertImageType(const ImageType type) { | ||
| 65 | switch (type) { | ||
| 66 | case ImageType::e1D: | ||
| 43 | return VK_IMAGE_TYPE_1D; | 67 | return VK_IMAGE_TYPE_1D; |
| 44 | case SurfaceTarget::Texture2D: | 68 | case ImageType::e2D: |
| 45 | case SurfaceTarget::Texture2DArray: | 69 | case ImageType::Linear: |
| 46 | case SurfaceTarget::TextureCubemap: | ||
| 47 | case SurfaceTarget::TextureCubeArray: | ||
| 48 | return VK_IMAGE_TYPE_2D; | 70 | return VK_IMAGE_TYPE_2D; |
| 49 | case SurfaceTarget::Texture3D: | 71 | case ImageType::e3D: |
| 50 | return VK_IMAGE_TYPE_3D; | 72 | return VK_IMAGE_TYPE_3D; |
| 51 | case SurfaceTarget::TextureBuffer: | 73 | case ImageType::Buffer: |
| 52 | UNREACHABLE(); | 74 | break; |
| 53 | return {}; | ||
| 54 | } | 75 | } |
| 55 | UNREACHABLE_MSG("Unknown texture target={}", target); | 76 | UNREACHABLE_MSG("Invalid image type={}", type); |
| 56 | return {}; | 77 | return {}; |
| 57 | } | 78 | } |
| 58 | 79 | ||
| 59 | VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { | 80 | [[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { |
| 60 | if (pixel_format < PixelFormat::MaxColorFormat) { | 81 | switch (num_samples) { |
| 61 | return VK_IMAGE_ASPECT_COLOR_BIT; | 82 | case 1: |
| 62 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { | 83 | return VK_SAMPLE_COUNT_1_BIT; |
| 63 | return VK_IMAGE_ASPECT_DEPTH_BIT; | 84 | case 2: |
| 64 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { | 85 | return VK_SAMPLE_COUNT_2_BIT; |
| 65 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | 86 | case 4: |
| 66 | } else { | 87 | return VK_SAMPLE_COUNT_4_BIT; |
| 67 | UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); | 88 | case 8: |
| 68 | return VK_IMAGE_ASPECT_COLOR_BIT; | 89 | return VK_SAMPLE_COUNT_8_BIT; |
| 90 | case 16: | ||
| 91 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 92 | default: | ||
| 93 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 94 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 69 | } | 95 | } |
| 70 | } | 96 | } |
| 71 | 97 | ||
| 72 | VkImageViewType GetImageViewType(SurfaceTarget target) { | 98 | [[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { |
| 73 | switch (target) { | 99 | const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); |
| 74 | case SurfaceTarget::Texture1D: | 100 | VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; |
| 75 | return VK_IMAGE_VIEW_TYPE_1D; | 101 | if (info.type == ImageType::e2D && info.resources.layers >= 6 && |
| 76 | case SurfaceTarget::Texture2D: | 102 | info.size.width == info.size.height) { |
| 77 | return VK_IMAGE_VIEW_TYPE_2D; | 103 | flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; |
| 78 | case SurfaceTarget::Texture3D: | ||
| 79 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 80 | case SurfaceTarget::Texture1DArray: | ||
| 81 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 82 | case SurfaceTarget::Texture2DArray: | ||
| 83 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 84 | case SurfaceTarget::TextureCubemap: | ||
| 85 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 86 | case SurfaceTarget::TextureCubeArray: | ||
| 87 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 88 | case SurfaceTarget::TextureBuffer: | ||
| 89 | break; | ||
| 90 | } | 104 | } |
| 91 | UNREACHABLE(); | 105 | if (info.type == ImageType::e3D) { |
| 92 | return {}; | 106 | flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; |
| 93 | } | 107 | } |
| 94 | 108 | VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | | |
| 95 | vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, | 109 | VK_IMAGE_USAGE_SAMPLED_BIT; |
| 96 | std::size_t host_memory_size) { | 110 | if (format_info.attachable) { |
| 97 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache | 111 | switch (VideoCore::Surface::GetFormatType(info.format)) { |
| 98 | return device.GetLogical().CreateBuffer({ | 112 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| 99 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 113 | usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; |
| 114 | break; | ||
| 115 | case VideoCore::Surface::SurfaceType::Depth: | ||
| 116 | case VideoCore::Surface::SurfaceType::DepthStencil: | ||
| 117 | usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; | ||
| 118 | break; | ||
| 119 | default: | ||
| 120 | UNREACHABLE_MSG("Invalid surface type"); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | if (format_info.storage) { | ||
| 124 | usage |= VK_IMAGE_USAGE_STORAGE_BIT; | ||
| 125 | } | ||
| 126 | const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); | ||
| 127 | return VkImageCreateInfo{ | ||
| 128 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | ||
| 100 | .pNext = nullptr, | 129 | .pNext = nullptr, |
| 101 | .flags = 0, | 130 | .flags = flags, |
| 102 | .size = static_cast<VkDeviceSize>(host_memory_size), | 131 | .imageType = ConvertImageType(info.type), |
| 103 | .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | 132 | .format = format_info.format, |
| 104 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | 133 | .extent = |
| 105 | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 134 | { |
| 135 | .width = info.size.width >> samples_x, | ||
| 136 | .height = info.size.height >> samples_y, | ||
| 137 | .depth = info.size.depth, | ||
| 138 | }, | ||
| 139 | .mipLevels = static_cast<u32>(info.resources.levels), | ||
| 140 | .arrayLayers = static_cast<u32>(info.resources.layers), | ||
| 141 | .samples = ConvertSampleCount(info.num_samples), | ||
| 142 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 143 | .usage = usage, | ||
| 106 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 144 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 107 | .queueFamilyIndexCount = 0, | 145 | .queueFamilyIndexCount = 0, |
| 108 | .pQueueFamilyIndices = nullptr, | 146 | .pQueueFamilyIndices = nullptr, |
| 109 | }); | 147 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 110 | } | ||
| 111 | |||
| 112 | VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, | ||
| 113 | const SurfaceParams& params, VkBuffer buffer, | ||
| 114 | std::size_t host_memory_size) { | ||
| 115 | ASSERT(params.IsBuffer()); | ||
| 116 | |||
| 117 | return { | ||
| 118 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 119 | .pNext = nullptr, | ||
| 120 | .flags = 0, | ||
| 121 | .buffer = buffer, | ||
| 122 | .format = | ||
| 123 | MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, | ||
| 124 | .offset = 0, | ||
| 125 | .range = static_cast<VkDeviceSize>(host_memory_size), | ||
| 126 | }; | 148 | }; |
| 127 | } | 149 | } |
| 128 | 150 | ||
| 129 | VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { | 151 | [[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { |
| 130 | ASSERT(!params.IsBuffer()); | 152 | if (info.type == ImageType::Buffer) { |
| 131 | 153 | return vk::Image{}; | |
| 132 | const auto [format, attachable, storage] = | 154 | } |
| 133 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); | 155 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); |
| 156 | } | ||
| 134 | 157 | ||
| 135 | VkImageCreateInfo ci{ | 158 | [[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { |
| 136 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 159 | if (info.type != ImageType::Buffer) { |
| 160 | return vk::Buffer{}; | ||
| 161 | } | ||
| 162 | const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); | ||
| 163 | return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 164 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 137 | .pNext = nullptr, | 165 | .pNext = nullptr, |
| 138 | .flags = 0, | 166 | .flags = 0, |
| 139 | .imageType = SurfaceTargetToImage(params.target), | 167 | .size = info.size.width * bytes_per_block, |
| 140 | .format = format, | 168 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 141 | .extent = {}, | 169 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | |
| 142 | .mipLevels = params.num_levels, | 170 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, |
| 143 | .arrayLayers = static_cast<u32>(params.GetNumLayers()), | ||
| 144 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 145 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 146 | .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | | ||
| 147 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, | ||
| 148 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 171 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 149 | .queueFamilyIndexCount = 0, | 172 | .queueFamilyIndexCount = 0, |
| 150 | .pQueueFamilyIndices = nullptr, | 173 | .pQueueFamilyIndices = nullptr, |
| 151 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 174 | }); |
| 152 | }; | ||
| 153 | if (attachable) { | ||
| 154 | ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | ||
| 155 | : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | ||
| 156 | } | ||
| 157 | if (storage) { | ||
| 158 | ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; | ||
| 159 | } | ||
| 160 | |||
| 161 | switch (params.target) { | ||
| 162 | case SurfaceTarget::TextureCubemap: | ||
| 163 | case SurfaceTarget::TextureCubeArray: | ||
| 164 | ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; | ||
| 165 | [[fallthrough]]; | ||
| 166 | case SurfaceTarget::Texture1D: | ||
| 167 | case SurfaceTarget::Texture1DArray: | ||
| 168 | case SurfaceTarget::Texture2D: | ||
| 169 | case SurfaceTarget::Texture2DArray: | ||
| 170 | ci.extent = {params.width, params.height, 1}; | ||
| 171 | break; | ||
| 172 | case SurfaceTarget::Texture3D: | ||
| 173 | ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; | ||
| 174 | ci.extent = {params.width, params.height, params.depth}; | ||
| 175 | break; | ||
| 176 | case SurfaceTarget::TextureBuffer: | ||
| 177 | UNREACHABLE(); | ||
| 178 | } | ||
| 179 | |||
| 180 | return ci; | ||
| 181 | } | 175 | } |
| 182 | 176 | ||
| 183 | u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | 177 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { |
| 184 | SwizzleSource w_source) { | 178 | switch (VideoCore::Surface::GetFormatType(format)) { |
| 185 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | 179 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| 186 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | 180 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 181 | case VideoCore::Surface::SurfaceType::Depth: | ||
| 182 | return VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 183 | case VideoCore::Surface::SurfaceType::DepthStencil: | ||
| 184 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 185 | default: | ||
| 186 | UNREACHABLE_MSG("Invalid surface type"); | ||
| 187 | return VkImageAspectFlags{}; | ||
| 188 | } | ||
| 187 | } | 189 | } |
| 188 | 190 | ||
| 189 | } // Anonymous namespace | 191 | [[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { |
| 190 | 192 | if (info.IsRenderTarget()) { | |
| 191 | CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, | 193 | return ImageAspectMask(info.format); |
| 192 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, | ||
| 193 | GPUVAddr gpu_addr_, const SurfaceParams& params_) | ||
| 194 | : SurfaceBase<View>{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, | ||
| 195 | memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { | ||
| 196 | if (params.IsBuffer()) { | ||
| 197 | buffer = CreateBuffer(device, params, host_memory_size); | ||
| 198 | commit = memory_manager.Commit(buffer, false); | ||
| 199 | |||
| 200 | const auto buffer_view_ci = | ||
| 201 | GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); | ||
| 202 | format = buffer_view_ci.format; | ||
| 203 | |||
| 204 | buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); | ||
| 205 | } else { | ||
| 206 | const auto image_ci = GenerateImageCreateInfo(device, params); | ||
| 207 | format = image_ci.format; | ||
| 208 | |||
| 209 | image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); | ||
| 210 | commit = memory_manager.Commit(image->GetHandle(), false); | ||
| 211 | } | 194 | } |
| 212 | 195 | const bool is_first = info.Swizzle()[0] == SwizzleSource::R; | |
| 213 | // TODO(Rodrigo): Move this to a virtual function. | 196 | switch (info.format) { |
| 214 | u32 num_layers = 1; | 197 | case PixelFormat::D24_UNORM_S8_UINT: |
| 215 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { | 198 | case PixelFormat::D32_FLOAT_S8_UINT: |
| 216 | num_layers = params.depth; | 199 | return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; |
| 200 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 201 | return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 202 | case PixelFormat::D16_UNORM: | ||
| 203 | case PixelFormat::D32_FLOAT: | ||
| 204 | return VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 205 | default: | ||
| 206 | return VK_IMAGE_ASPECT_COLOR_BIT; | ||
| 217 | } | 207 | } |
| 218 | main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); | ||
| 219 | } | 208 | } |
| 220 | 209 | ||
| 221 | CachedSurface::~CachedSurface() = default; | 210 | [[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, |
| 222 | 211 | const ImageView* image_view) { | |
| 223 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | 212 | const auto pixel_format = image_view->format; |
| 224 | // To upload data we have to be outside of a renderpass | 213 | return VkAttachmentDescription{ |
| 225 | scheduler.RequestOutsideRenderPassOperationContext(); | 214 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, |
| 215 | .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, | ||
| 216 | .samples = image_view->Samples(), | ||
| 217 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 218 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 219 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 220 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 221 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 222 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 223 | }; | ||
| 224 | } | ||
| 226 | 225 | ||
| 227 | if (params.IsBuffer()) { | 226 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { |
| 228 | UploadBuffer(staging_buffer); | 227 | switch (swizzle) { |
| 229 | } else { | 228 | case SwizzleSource::Zero: |
| 230 | UploadImage(staging_buffer); | 229 | return VK_COMPONENT_SWIZZLE_ZERO; |
| 230 | case SwizzleSource::R: | ||
| 231 | return VK_COMPONENT_SWIZZLE_R; | ||
| 232 | case SwizzleSource::G: | ||
| 233 | return VK_COMPONENT_SWIZZLE_G; | ||
| 234 | case SwizzleSource::B: | ||
| 235 | return VK_COMPONENT_SWIZZLE_B; | ||
| 236 | case SwizzleSource::A: | ||
| 237 | return VK_COMPONENT_SWIZZLE_A; | ||
| 238 | case SwizzleSource::OneFloat: | ||
| 239 | case SwizzleSource::OneInt: | ||
| 240 | return VK_COMPONENT_SWIZZLE_ONE; | ||
| 231 | } | 241 | } |
| 242 | UNREACHABLE_MSG("Invalid swizzle={}", swizzle); | ||
| 243 | return VK_COMPONENT_SWIZZLE_ZERO; | ||
| 232 | } | 244 | } |
| 233 | 245 | ||
| 234 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | 246 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { |
| 235 | UNIMPLEMENTED_IF(params.IsBuffer()); | 247 | switch (type) { |
| 236 | 248 | case VideoCommon::ImageViewType::e1D: | |
| 237 | if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { | 249 | return VK_IMAGE_VIEW_TYPE_1D; |
| 238 | LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); | 250 | case VideoCommon::ImageViewType::e2D: |
| 251 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 252 | case VideoCommon::ImageViewType::Cube: | ||
| 253 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 254 | case VideoCommon::ImageViewType::e3D: | ||
| 255 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 256 | case VideoCommon::ImageViewType::e1DArray: | ||
| 257 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 258 | case VideoCommon::ImageViewType::e2DArray: | ||
| 259 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 260 | case VideoCommon::ImageViewType::CubeArray: | ||
| 261 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 262 | case VideoCommon::ImageViewType::Rect: | ||
| 263 | LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); | ||
| 264 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 265 | case VideoCommon::ImageViewType::Buffer: | ||
| 266 | UNREACHABLE_MSG("Texture buffers can't be image views"); | ||
| 267 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 239 | } | 268 | } |
| 269 | UNREACHABLE_MSG("Invalid image view type={}", type); | ||
| 270 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 271 | } | ||
| 240 | 272 | ||
| 241 | // We can't copy images to buffers inside a renderpass | 273 | [[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( |
| 242 | scheduler.RequestOutsideRenderPassOperationContext(); | 274 | VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { |
| 275 | return VkImageSubresourceLayers{ | ||
| 276 | .aspectMask = aspect_mask, | ||
| 277 | .mipLevel = static_cast<u32>(subresource.base_level), | ||
| 278 | .baseArrayLayer = static_cast<u32>(subresource.base_layer), | ||
| 279 | .layerCount = static_cast<u32>(subresource.num_layers), | ||
| 280 | }; | ||
| 281 | } | ||
| 243 | 282 | ||
| 244 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, | 283 | [[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { |
| 245 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); | 284 | return VkOffset3D{ |
| 285 | .x = offset3d.x, | ||
| 286 | .y = offset3d.y, | ||
| 287 | .z = offset3d.z, | ||
| 288 | }; | ||
| 289 | } | ||
| 246 | 290 | ||
| 247 | const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 291 | [[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { |
| 248 | // TODO(Rodrigo): Do this in a single copy | 292 | return VkExtent3D{ |
| 249 | for (u32 level = 0; level < params.num_levels; ++level) { | 293 | .width = static_cast<u32>(extent3d.width), |
| 250 | scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, | 294 | .height = static_cast<u32>(extent3d.height), |
| 251 | copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { | 295 | .depth = static_cast<u32>(extent3d.depth), |
| 252 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); | 296 | }; |
| 253 | }); | 297 | } |
| 254 | } | ||
| 255 | scheduler.Finish(); | ||
| 256 | 298 | ||
| 257 | // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. | 299 | [[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, |
| 258 | std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), | 300 | VkImageAspectFlags aspect_mask) noexcept { |
| 259 | host_memory_size); | 301 | return VkImageCopy{ |
| 302 | .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), | ||
| 303 | .srcOffset = MakeOffset3D(copy.src_offset), | ||
| 304 | .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), | ||
| 305 | .dstOffset = MakeOffset3D(copy.dst_offset), | ||
| 306 | .extent = MakeExtent3D(copy.extent), | ||
| 307 | }; | ||
| 260 | } | 308 | } |
| 261 | 309 | ||
| 262 | void CachedSurface::DecorateSurfaceName() { | 310 | [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( |
| 263 | // TODO(Rodrigo): Add name decorations | 311 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 312 | std::vector<VkBufferCopy> result(copies.size()); | ||
| 313 | std::ranges::transform( | ||
| 314 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { | ||
| 315 | return VkBufferCopy{ | ||
| 316 | .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset), | ||
| 317 | .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset), | ||
| 318 | .size = static_cast<VkDeviceSize>(copy.size), | ||
| 319 | }; | ||
| 320 | }); | ||
| 321 | return result; | ||
| 264 | } | 322 | } |
| 265 | 323 | ||
| 266 | View CachedSurface::CreateView(const ViewParams& view_params) { | 324 | [[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( |
| 267 | // TODO(Rodrigo): Add name decorations | 325 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { |
| 268 | return views[view_params] = std::make_shared<CachedSurfaceView>(device, *this, view_params); | 326 | struct Maker { |
| 327 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { | ||
| 328 | return VkBufferImageCopy{ | ||
| 329 | .bufferOffset = copy.buffer_offset + buffer_offset, | ||
| 330 | .bufferRowLength = copy.buffer_row_length, | ||
| 331 | .bufferImageHeight = copy.buffer_image_height, | ||
| 332 | .imageSubresource = | ||
| 333 | { | ||
| 334 | .aspectMask = aspect_mask, | ||
| 335 | .mipLevel = static_cast<u32>(copy.image_subresource.base_level), | ||
| 336 | .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer), | ||
| 337 | .layerCount = static_cast<u32>(copy.image_subresource.num_layers), | ||
| 338 | }, | ||
| 339 | .imageOffset = | ||
| 340 | { | ||
| 341 | .x = copy.image_offset.x, | ||
| 342 | .y = copy.image_offset.y, | ||
| 343 | .z = copy.image_offset.z, | ||
| 344 | }, | ||
| 345 | .imageExtent = | ||
| 346 | { | ||
| 347 | .width = copy.image_extent.width, | ||
| 348 | .height = copy.image_extent.height, | ||
| 349 | .depth = copy.image_extent.depth, | ||
| 350 | }, | ||
| 351 | }; | ||
| 352 | } | ||
| 353 | size_t buffer_offset; | ||
| 354 | VkImageAspectFlags aspect_mask; | ||
| 355 | }; | ||
| 356 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 357 | std::vector<VkBufferImageCopy> result(copies.size() * 2); | ||
| 358 | std::ranges::transform(copies, result.begin(), | ||
| 359 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); | ||
| 360 | std::ranges::transform(copies, result.begin() + copies.size(), | ||
| 361 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); | ||
| 362 | return result; | ||
| 363 | } else { | ||
| 364 | std::vector<VkBufferImageCopy> result(copies.size()); | ||
| 365 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); | ||
| 366 | return result; | ||
| 367 | } | ||
| 269 | } | 368 | } |
| 270 | 369 | ||
| 271 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | 370 | [[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, |
| 272 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 371 | const SubresourceRange& range) { |
| 273 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 372 | return VkImageSubresourceRange{ |
| 373 | .aspectMask = aspect_mask, | ||
| 374 | .baseMipLevel = static_cast<u32>(range.base.level), | ||
| 375 | .levelCount = static_cast<u32>(range.extent.levels), | ||
| 376 | .baseArrayLayer = static_cast<u32>(range.base.layer), | ||
| 377 | .layerCount = static_cast<u32>(range.extent.layers), | ||
| 378 | }; | ||
| 379 | } | ||
| 274 | 380 | ||
| 275 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, | 381 | [[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { |
| 276 | size = host_memory_size](vk::CommandBuffer cmdbuf) { | 382 | SubresourceRange range = image_view->range; |
| 277 | VkBufferCopy copy; | 383 | if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 278 | copy.srcOffset = 0; | 384 | // Slice image views always affect a single layer, but their subresource range corresponds |
| 279 | copy.dstOffset = 0; | 385 | // to the slice. Override the value to affect a single layer. |
| 280 | copy.size = size; | 386 | range.base.layer = 0; |
| 281 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | 387 | range.extent.layers = 1; |
| 388 | } | ||
| 389 | return MakeSubresourceRange(ImageAspectMask(image_view->format), range); | ||
| 390 | } | ||
| 282 | 391 | ||
| 283 | VkBufferMemoryBarrier barrier; | 392 | [[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { |
| 284 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 393 | return VkImageSubresourceLayers{ |
| 285 | barrier.pNext = nullptr; | 394 | .aspectMask = ImageAspectMask(image_view->format), |
| 286 | barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 395 | .mipLevel = static_cast<u32>(image_view->range.base.level), |
| 287 | barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | 396 | .baseArrayLayer = static_cast<u32>(image_view->range.base.layer), |
| 288 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway | 397 | .layerCount = static_cast<u32>(image_view->range.extent.layers), |
| 289 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 398 | }; |
| 290 | barrier.buffer = dst_buffer; | ||
| 291 | barrier.offset = 0; | ||
| 292 | barrier.size = size; | ||
| 293 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, | ||
| 294 | 0, {}, barrier, {}); | ||
| 295 | }); | ||
| 296 | } | 399 | } |
| 297 | 400 | ||
| 298 | void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | 401 | [[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { |
| 299 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 402 | switch (value) { |
| 300 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 403 | case SwizzleSource::G: |
| 301 | 404 | return SwizzleSource::R; | |
| 302 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, | 405 | default: |
| 303 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | 406 | return value; |
| 304 | |||
| 305 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 306 | const VkBufferImageCopy copy = GetBufferImageCopy(level); | ||
| 307 | if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 308 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), | ||
| 309 | copy](vk::CommandBuffer cmdbuf) { | ||
| 310 | std::array<VkBufferImageCopy, 2> copies = {copy, copy}; | ||
| 311 | copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 312 | copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 313 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 314 | copies); | ||
| 315 | }); | ||
| 316 | } else { | ||
| 317 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), | ||
| 318 | copy](vk::CommandBuffer cmdbuf) { | ||
| 319 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | ||
| 320 | }); | ||
| 321 | } | ||
| 322 | } | 407 | } |
| 323 | } | 408 | } |
| 324 | 409 | ||
| 325 | VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { | 410 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, |
| 326 | return { | 411 | VkImageAspectFlags aspect_mask, bool is_initialized, |
| 327 | .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), | 412 | std::span<const VkBufferImageCopy> copies) { |
| 328 | .bufferRowLength = 0, | 413 | static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | |
| 329 | .bufferImageHeight = 0, | 414 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 330 | .imageSubresource = | 415 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 416 | const VkImageMemoryBarrier read_barrier{ | ||
| 417 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 418 | .pNext = nullptr, | ||
| 419 | .srcAccessMask = ACCESS_FLAGS, | ||
| 420 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 421 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 422 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 423 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 424 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 425 | .image = image, | ||
| 426 | .subresourceRange = | ||
| 331 | { | 427 | { |
| 332 | .aspectMask = image->GetAspectMask(), | 428 | .aspectMask = aspect_mask, |
| 333 | .mipLevel = level, | 429 | .baseMipLevel = 0, |
| 430 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 334 | .baseArrayLayer = 0, | 431 | .baseArrayLayer = 0, |
| 335 | .layerCount = static_cast<u32>(params.GetNumLayers()), | 432 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 336 | }, | 433 | }, |
| 337 | .imageOffset = {.x = 0, .y = 0, .z = 0}, | 434 | }; |
| 338 | .imageExtent = | 435 | const VkImageMemoryBarrier write_barrier{ |
| 436 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 437 | .pNext = nullptr, | ||
| 438 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 439 | .dstAccessMask = ACCESS_FLAGS, | ||
| 440 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 441 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 442 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 443 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 444 | .image = image, | ||
| 445 | .subresourceRange = | ||
| 339 | { | 446 | { |
| 340 | .width = params.GetMipWidth(level), | 447 | .aspectMask = aspect_mask, |
| 341 | .height = params.GetMipHeight(level), | 448 | .baseMipLevel = 0, |
| 342 | .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, | 449 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 450 | .baseArrayLayer = 0, | ||
| 451 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 343 | }, | 452 | }, |
| 344 | }; | 453 | }; |
| 454 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | ||
| 455 | read_barrier); | ||
| 456 | cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); | ||
| 457 | // TODO: Move this to another API | ||
| 458 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, | ||
| 459 | write_barrier); | ||
| 345 | } | 460 | } |
| 346 | 461 | ||
| 347 | VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | 462 | [[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, |
| 348 | return {image->GetAspectMask(), 0, params.num_levels, 0, | 463 | const std::array<Offset2D, 2>& src_region, |
| 349 | static_cast<u32>(params.GetNumLayers())}; | 464 | const VkImageSubresourceLayers& dst_layers, |
| 465 | const VkImageSubresourceLayers& src_layers) { | ||
| 466 | return VkImageBlit{ | ||
| 467 | .srcSubresource = src_layers, | ||
| 468 | .srcOffsets = | ||
| 469 | { | ||
| 470 | { | ||
| 471 | .x = src_region[0].x, | ||
| 472 | .y = src_region[0].y, | ||
| 473 | .z = 0, | ||
| 474 | }, | ||
| 475 | { | ||
| 476 | .x = src_region[1].x, | ||
| 477 | .y = src_region[1].y, | ||
| 478 | .z = 1, | ||
| 479 | }, | ||
| 480 | }, | ||
| 481 | .dstSubresource = dst_layers, | ||
| 482 | .dstOffsets = | ||
| 483 | { | ||
| 484 | { | ||
| 485 | .x = dst_region[0].x, | ||
| 486 | .y = dst_region[0].y, | ||
| 487 | .z = 0, | ||
| 488 | }, | ||
| 489 | { | ||
| 490 | .x = dst_region[1].x, | ||
| 491 | .y = dst_region[1].y, | ||
| 492 | .z = 1, | ||
| 493 | }, | ||
| 494 | }, | ||
| 495 | }; | ||
| 350 | } | 496 | } |
| 351 | 497 | ||
| 352 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, | 498 | [[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, |
| 353 | const ViewParams& view_params_) | 499 | const std::array<Offset2D, 2>& src_region, |
| 354 | : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, | 500 | const VkImageSubresourceLayers& dst_layers, |
| 355 | image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, | 501 | const VkImageSubresourceLayers& src_layers) { |
| 356 | aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, | 502 | return VkImageResolve{ |
| 357 | base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, | 503 | .srcSubresource = src_layers, |
| 358 | image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { | 504 | .srcOffset = |
| 359 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 505 | { |
| 360 | base_layer = 0; | 506 | .x = src_region[0].x, |
| 361 | num_layers = 1; | 507 | .y = src_region[0].y, |
| 362 | base_slice = view_params_.base_layer; | 508 | .z = 0, |
| 363 | num_slices = view_params_.num_layers; | 509 | }, |
| 364 | } else { | 510 | .dstSubresource = dst_layers, |
| 365 | base_layer = view_params_.base_layer; | 511 | .dstOffset = |
| 366 | num_layers = view_params_.num_layers; | 512 | { |
| 367 | } | 513 | .x = dst_region[0].x, |
| 514 | .y = dst_region[0].y, | ||
| 515 | .z = 0, | ||
| 516 | }, | ||
| 517 | .extent = | ||
| 518 | { | ||
| 519 | .width = static_cast<u32>(dst_region[1].x - dst_region[0].x), | ||
| 520 | .height = static_cast<u32>(dst_region[1].y - dst_region[0].y), | ||
| 521 | .depth = 1, | ||
| 522 | }, | ||
| 523 | }; | ||
| 368 | } | 524 | } |
| 369 | 525 | ||
| 370 | CachedSurfaceView::~CachedSurfaceView() = default; | 526 | struct RangedBarrierRange { |
| 371 | 527 | u32 min_mip = std::numeric_limits<u32>::max(); | |
| 372 | VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, | 528 | u32 max_mip = std::numeric_limits<u32>::min(); |
| 373 | SwizzleSource z_source, SwizzleSource w_source) { | 529 | u32 min_layer = std::numeric_limits<u32>::max(); |
| 374 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 530 | u32 max_layer = std::numeric_limits<u32>::min(); |
| 375 | if (last_image_view && last_swizzle == new_swizzle) { | 531 | |
| 376 | return last_image_view; | 532 | void AddLayers(const VkImageSubresourceLayers& layers) { |
| 533 | min_mip = std::min(min_mip, layers.mipLevel); | ||
| 534 | max_mip = std::max(max_mip, layers.mipLevel + 1); | ||
| 535 | min_layer = std::min(min_layer, layers.baseArrayLayer); | ||
| 536 | max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); | ||
| 377 | } | 537 | } |
| 378 | last_swizzle = new_swizzle; | ||
| 379 | 538 | ||
| 380 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | 539 | VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { |
| 381 | auto& image_view = entry->second; | 540 | return VkImageSubresourceRange{ |
| 382 | if (!is_cache_miss) { | 541 | .aspectMask = aspect_mask, |
| 383 | return last_image_view = *image_view; | 542 | .baseMipLevel = min_mip, |
| 543 | .levelCount = max_mip - min_mip, | ||
| 544 | .baseArrayLayer = min_layer, | ||
| 545 | .layerCount = max_layer - min_layer, | ||
| 546 | }; | ||
| 384 | } | 547 | } |
| 548 | }; | ||
| 385 | 549 | ||
| 386 | std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), | 550 | } // Anonymous namespace |
| 387 | MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; | ||
| 388 | if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { | ||
| 389 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | ||
| 390 | std::swap(swizzle[0], swizzle[2]); | ||
| 391 | } | ||
| 392 | 551 | ||
| 393 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | 552 | void TextureCacheRuntime::Finish() { |
| 394 | // hardware. To emulate this on Vulkan we specify it in the aspect. | 553 | scheduler.Finish(); |
| 395 | VkImageAspectFlags aspect = aspect_mask; | 554 | } |
| 396 | if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 397 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | ||
| 398 | const bool is_first = x_source == SwizzleSource::R; | ||
| 399 | switch (surface_params.pixel_format) { | ||
| 400 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 401 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 402 | aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 403 | break; | ||
| 404 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 405 | aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 406 | break; | ||
| 407 | default: | ||
| 408 | aspect = VK_IMAGE_ASPECT_DEPTH_BIT; | ||
| 409 | UNIMPLEMENTED(); | ||
| 410 | } | ||
| 411 | 555 | ||
| 412 | // Make sure we sample the first component | 556 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { |
| 413 | std::transform( | 557 | const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); |
| 414 | swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { | 558 | return ImageBufferMap{ |
| 415 | return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; | 559 | .handle = *buffer.handle, |
| 416 | }); | 560 | .map = buffer.commit->Map(size), |
| 417 | } | 561 | }; |
| 562 | } | ||
| 418 | 563 | ||
| 419 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 564 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 420 | ASSERT(base_slice == 0); | 565 | const std::array<Offset2D, 2>& dst_region, |
| 421 | ASSERT(num_slices == surface_params.depth); | 566 | const std::array<Offset2D, 2>& src_region, |
| 567 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 568 | Tegra::Engines::Fermi2D::Operation operation) { | ||
| 569 | const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); | ||
| 570 | const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; | ||
| 571 | const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; | ||
| 572 | ASSERT(aspect_mask == ImageAspectMask(dst.format)); | ||
| 573 | if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { | ||
| 574 | blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, | ||
| 575 | operation); | ||
| 576 | return; | ||
| 422 | } | 577 | } |
| 423 | 578 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | |
| 424 | image_view = device.GetLogical().CreateImageView({ | 579 | if (!device.IsBlitDepthStencilSupported()) { |
| 425 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 580 | UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); |
| 426 | .pNext = nullptr, | 581 | blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), |
| 427 | .flags = 0, | 582 | dst_region, src_region, filter, operation); |
| 428 | .image = surface.GetImageHandle(), | 583 | return; |
| 429 | .viewType = image_view_type, | 584 | } |
| 430 | .format = surface.GetImage().GetFormat(), | 585 | } |
| 431 | .components = | 586 | ASSERT(src.ImageFormat() == dst.ImageFormat()); |
| 432 | { | 587 | ASSERT(!(is_dst_msaa && !is_src_msaa)); |
| 433 | .r = swizzle[0], | 588 | ASSERT(operation == Fermi2D::Operation::SrcCopy); |
| 434 | .g = swizzle[1], | 589 | |
| 435 | .b = swizzle[2], | 590 | const VkImage dst_image = dst.ImageHandle(); |
| 436 | .a = swizzle[3], | 591 | const VkImage src_image = src.ImageHandle(); |
| 592 | const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); | ||
| 593 | const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); | ||
| 594 | const bool is_resolve = is_src_msaa && !is_dst_msaa; | ||
| 595 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 596 | scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, | ||
| 597 | aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { | ||
| 598 | const std::array read_barriers{ | ||
| 599 | VkImageMemoryBarrier{ | ||
| 600 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 601 | .pNext = nullptr, | ||
| 602 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | | ||
| 603 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 604 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 605 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 606 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 607 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 608 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 609 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 610 | .image = src_image, | ||
| 611 | .subresourceRange{ | ||
| 612 | .aspectMask = aspect_mask, | ||
| 613 | .baseMipLevel = 0, | ||
| 614 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 615 | .baseArrayLayer = 0, | ||
| 616 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 617 | }, | ||
| 437 | }, | 618 | }, |
| 438 | .subresourceRange = | 619 | VkImageMemoryBarrier{ |
| 439 | { | 620 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 440 | .aspectMask = aspect, | 621 | .pNext = nullptr, |
| 441 | .baseMipLevel = base_level, | 622 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | |
| 442 | .levelCount = num_levels, | 623 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 443 | .baseArrayLayer = base_layer, | 624 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 444 | .layerCount = num_layers, | 625 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 626 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 627 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 628 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 629 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 630 | .image = dst_image, | ||
| 631 | .subresourceRange{ | ||
| 632 | .aspectMask = aspect_mask, | ||
| 633 | .baseMipLevel = 0, | ||
| 634 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 635 | .baseArrayLayer = 0, | ||
| 636 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 637 | }, | ||
| 638 | }, | ||
| 639 | }; | ||
| 640 | VkImageMemoryBarrier write_barrier{ | ||
| 641 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 642 | .pNext = nullptr, | ||
| 643 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 644 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 645 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 646 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 647 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 648 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 649 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 650 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 651 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 652 | .image = dst_image, | ||
| 653 | .subresourceRange{ | ||
| 654 | .aspectMask = aspect_mask, | ||
| 655 | .baseMipLevel = 0, | ||
| 656 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 657 | .baseArrayLayer = 0, | ||
| 658 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 445 | }, | 659 | }, |
| 660 | }; | ||
| 661 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 662 | 0, nullptr, nullptr, read_barriers); | ||
| 663 | if (is_resolve) { | ||
| 664 | cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, | ||
| 665 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 666 | MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); | ||
| 667 | } else { | ||
| 668 | const bool is_linear = filter == Fermi2D::Filter::Bilinear; | ||
| 669 | const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; | ||
| 670 | cmdbuf.BlitImage( | ||
| 671 | src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 672 | MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); | ||
| 673 | } | ||
| 674 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 675 | 0, write_barrier); | ||
| 446 | }); | 676 | }); |
| 447 | |||
| 448 | return last_image_view = *image_view; | ||
| 449 | } | 677 | } |
| 450 | 678 | ||
| 451 | VkImageView CachedSurfaceView::GetAttachment() { | 679 | void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { |
| 452 | if (render_target) { | 680 | switch (dst_view.format) { |
| 453 | return *render_target; | 681 | case PixelFormat::R16_UNORM: |
| 682 | if (src_view.format == PixelFormat::D16_UNORM) { | ||
| 683 | return blit_image_helper.ConvertD16ToR16(dst, src_view); | ||
| 684 | } | ||
| 685 | break; | ||
| 686 | case PixelFormat::R32_FLOAT: | ||
| 687 | if (src_view.format == PixelFormat::D32_FLOAT) { | ||
| 688 | return blit_image_helper.ConvertD32ToR32(dst, src_view); | ||
| 689 | } | ||
| 690 | break; | ||
| 691 | case PixelFormat::D16_UNORM: | ||
| 692 | if (src_view.format == PixelFormat::R16_UNORM) { | ||
| 693 | return blit_image_helper.ConvertR16ToD16(dst, src_view); | ||
| 694 | } | ||
| 695 | break; | ||
| 696 | case PixelFormat::D32_FLOAT: | ||
| 697 | if (src_view.format == PixelFormat::R32_FLOAT) { | ||
| 698 | return blit_image_helper.ConvertR32ToD32(dst, src_view); | ||
| 699 | } | ||
| 700 | break; | ||
| 701 | default: | ||
| 702 | break; | ||
| 454 | } | 703 | } |
| 704 | UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); | ||
| 705 | } | ||
| 455 | 706 | ||
| 456 | VkImageViewCreateInfo ci{ | 707 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, |
| 457 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 708 | std::span<const VideoCommon::ImageCopy> copies) { |
| 458 | .pNext = nullptr, | 709 | std::vector<VkImageCopy> vk_copies(copies.size()); |
| 459 | .flags = 0, | 710 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); |
| 460 | .image = surface.GetImageHandle(), | 711 | ASSERT(aspect_mask == src.AspectMask()); |
| 461 | .viewType = VK_IMAGE_VIEW_TYPE_1D, | 712 | |
| 462 | .format = surface.GetImage().GetFormat(), | 713 | std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { |
| 463 | .components = | 714 | return MakeImageCopy(copy, aspect_mask); |
| 464 | { | 715 | }); |
| 465 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 716 | const VkImage dst_image = dst.Handle(); |
| 466 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 717 | const VkImage src_image = src.Handle(); |
| 467 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | 718 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 468 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | 719 | scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { |
| 720 | RangedBarrierRange dst_range; | ||
| 721 | RangedBarrierRange src_range; | ||
| 722 | for (const VkImageCopy& copy : vk_copies) { | ||
| 723 | dst_range.AddLayers(copy.dstSubresource); | ||
| 724 | src_range.AddLayers(copy.srcSubresource); | ||
| 725 | } | ||
| 726 | const std::array read_barriers{ | ||
| 727 | VkImageMemoryBarrier{ | ||
| 728 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 729 | .pNext = nullptr, | ||
| 730 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 731 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 732 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 733 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 734 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 735 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 736 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 737 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 738 | .image = src_image, | ||
| 739 | .subresourceRange = src_range.SubresourceRange(aspect_mask), | ||
| 469 | }, | 740 | }, |
| 470 | .subresourceRange = | 741 | VkImageMemoryBarrier{ |
| 471 | { | 742 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 472 | .aspectMask = aspect_mask, | 743 | .pNext = nullptr, |
| 473 | .baseMipLevel = base_level, | 744 | .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | |
| 474 | .levelCount = num_levels, | 745 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | |
| 475 | .baseArrayLayer = 0, | 746 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 476 | .layerCount = 0, | 747 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | |
| 748 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 749 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 750 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 751 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 752 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 753 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 754 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 755 | .image = dst_image, | ||
| 756 | .subresourceRange = dst_range.SubresourceRange(aspect_mask), | ||
| 477 | }, | 757 | }, |
| 478 | }; | 758 | }; |
| 479 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | 759 | const VkImageMemoryBarrier write_barrier{ |
| 480 | ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; | 760 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 481 | ci.subresourceRange.baseArrayLayer = base_slice; | 761 | .pNext = nullptr, |
| 482 | ci.subresourceRange.layerCount = num_slices; | 762 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 763 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 764 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 765 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 766 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 767 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 768 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 769 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 770 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 771 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 772 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 773 | .image = dst_image, | ||
| 774 | .subresourceRange = dst_range.SubresourceRange(aspect_mask), | ||
| 775 | }; | ||
| 776 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 777 | 0, {}, {}, read_barriers); | ||
| 778 | cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, | ||
| 779 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); | ||
| 780 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 781 | 0, write_barrier); | ||
| 782 | }); | ||
| 783 | } | ||
| 784 | |||
| 785 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | ||
| 786 | VAddr cpu_addr_) | ||
| 787 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | ||
| 788 | image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), | ||
| 789 | aspect_mask(ImageAspectMask(info.format)) { | ||
| 790 | if (image) { | ||
| 791 | commit = runtime.memory_manager.Commit(image, false); | ||
| 483 | } else { | 792 | } else { |
| 484 | ci.viewType = image_view_type; | 793 | commit = runtime.memory_manager.Commit(buffer, false); |
| 485 | ci.subresourceRange.baseArrayLayer = base_layer; | 794 | } |
| 486 | ci.subresourceRange.layerCount = num_layers; | 795 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 796 | flags |= VideoCommon::ImageFlagBits::Converted; | ||
| 797 | } | ||
| 798 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 799 | if (image) { | ||
| 800 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 801 | } else { | ||
| 802 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 803 | } | ||
| 487 | } | 804 | } |
| 488 | render_target = device.GetLogical().CreateImageView(ci); | ||
| 489 | return *render_target; | ||
| 490 | } | 805 | } |
| 491 | 806 | ||
| 492 | VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, | 807 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 493 | Tegra::Engines::Maxwell3D& maxwell3d_, | 808 | std::span<const BufferImageCopy> copies) { |
| 494 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | 809 | // TODO: Move this to another API |
| 495 | VKMemoryManager& memory_manager_, VKScheduler& scheduler_, | 810 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 496 | VKStagingBufferPool& staging_pool_) | 811 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 497 | : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), | 812 | const VkBuffer src_buffer = map.handle; |
| 498 | device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ | 813 | const VkImage vk_image = *image; |
| 499 | staging_pool_} {} | 814 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 500 | 815 | const bool is_initialized = std::exchange(initialized, true); | |
| 501 | VKTextureCache::~VKTextureCache() = default; | 816 | scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, |
| 502 | 817 | vk_copies](vk::CommandBuffer cmdbuf) { | |
| 503 | Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | 818 | CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); |
| 504 | return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, | 819 | }); |
| 505 | gpu_addr, params); | ||
| 506 | } | 820 | } |
| 507 | 821 | ||
| 508 | void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | 822 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 509 | const VideoCommon::CopyParams& copy_params) { | 823 | std::span<const VideoCommon::BufferCopy> copies) { |
| 510 | const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | 824 | // TODO: Move this to another API |
| 511 | const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | 825 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 512 | UNIMPLEMENTED_IF(src_3d); | 826 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); |
| 827 | const VkBuffer src_buffer = map.handle; | ||
| 828 | const VkBuffer dst_buffer = *buffer; | ||
| 829 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 830 | // TODO: Barriers | ||
| 831 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 832 | }); | ||
| 833 | } | ||
| 513 | 834 | ||
| 514 | // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and | 835 | void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 515 | // dimension respectively. | 836 | std::span<const BufferImageCopy> copies) { |
| 516 | const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; | 837 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 517 | const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; | 838 | scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, |
| 839 | vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 840 | // TODO: Barriers | ||
| 841 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | ||
| 842 | }); | ||
| 843 | } | ||
| 518 | 844 | ||
| 519 | const u32 extent_z = dst_3d ? copy_params.depth : 1; | 845 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 520 | const u32 num_layers = dst_3d ? 1 : copy_params.depth; | 846 | ImageId image_id_, Image& image) |
| 847 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, | ||
| 848 | image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( | ||
| 849 | image.info.num_samples)} { | ||
| 850 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); | ||
| 851 | std::array<SwizzleSource, 4> swizzle{ | ||
| 852 | SwizzleSource::R, | ||
| 853 | SwizzleSource::G, | ||
| 854 | SwizzleSource::B, | ||
| 855 | SwizzleSource::A, | ||
| 856 | }; | ||
| 857 | if (!info.IsRenderTarget()) { | ||
| 858 | swizzle = info.Swizzle(); | ||
| 859 | if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { | ||
| 860 | std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | const VkFormat vk_format = | ||
| 864 | MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; | ||
| 865 | const VkImageViewCreateInfo create_info{ | ||
| 866 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 867 | .pNext = nullptr, | ||
| 868 | .flags = 0, | ||
| 869 | .image = image.Handle(), | ||
| 870 | .viewType = VkImageViewType{}, | ||
| 871 | .format = vk_format, | ||
| 872 | .components{ | ||
| 873 | .r = ComponentSwizzle(swizzle[0]), | ||
| 874 | .g = ComponentSwizzle(swizzle[1]), | ||
| 875 | .b = ComponentSwizzle(swizzle[2]), | ||
| 876 | .a = ComponentSwizzle(swizzle[3]), | ||
| 877 | }, | ||
| 878 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), | ||
| 879 | }; | ||
| 880 | const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { | ||
| 881 | VkImageViewCreateInfo ci{create_info}; | ||
| 882 | ci.viewType = ImageViewType(view_type); | ||
| 883 | if (num_layers) { | ||
| 884 | ci.subresourceRange.layerCount = *num_layers; | ||
| 885 | } | ||
| 886 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); | ||
| 887 | if (device->HasDebuggingToolAttached()) { | ||
| 888 | handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); | ||
| 889 | } | ||
| 890 | image_views[static_cast<size_t>(view_type)] = std::move(handle); | ||
| 891 | }; | ||
| 892 | switch (info.type) { | ||
| 893 | case VideoCommon::ImageViewType::e1D: | ||
| 894 | case VideoCommon::ImageViewType::e1DArray: | ||
| 895 | create(VideoCommon::ImageViewType::e1D, 1); | ||
| 896 | create(VideoCommon::ImageViewType::e1DArray, std::nullopt); | ||
| 897 | render_target = Handle(VideoCommon::ImageViewType::e1DArray); | ||
| 898 | break; | ||
| 899 | case VideoCommon::ImageViewType::e2D: | ||
| 900 | case VideoCommon::ImageViewType::e2DArray: | ||
| 901 | create(VideoCommon::ImageViewType::e2D, 1); | ||
| 902 | create(VideoCommon::ImageViewType::e2DArray, std::nullopt); | ||
| 903 | render_target = Handle(VideoCommon::ImageViewType::e2DArray); | ||
| 904 | break; | ||
| 905 | case VideoCommon::ImageViewType::e3D: | ||
| 906 | create(VideoCommon::ImageViewType::e3D, std::nullopt); | ||
| 907 | render_target = Handle(VideoCommon::ImageViewType::e3D); | ||
| 908 | break; | ||
| 909 | case VideoCommon::ImageViewType::Cube: | ||
| 910 | case VideoCommon::ImageViewType::CubeArray: | ||
| 911 | create(VideoCommon::ImageViewType::Cube, 6); | ||
| 912 | create(VideoCommon::ImageViewType::CubeArray, std::nullopt); | ||
| 913 | break; | ||
| 914 | case VideoCommon::ImageViewType::Rect: | ||
| 915 | UNIMPLEMENTED(); | ||
| 916 | break; | ||
| 917 | case VideoCommon::ImageViewType::Buffer: | ||
| 918 | buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ | ||
| 919 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 920 | .pNext = nullptr, | ||
| 921 | .flags = 0, | ||
| 922 | .buffer = image.Buffer(), | ||
| 923 | .format = vk_format, | ||
| 924 | .offset = 0, // TODO: Redesign buffer cache to support this | ||
| 925 | .range = image.guest_size_bytes, | ||
| 926 | }); | ||
| 927 | break; | ||
| 928 | } | ||
| 929 | } | ||
| 521 | 930 | ||
| 522 | // We can't copy inside a renderpass | 931 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) |
| 523 | scheduler.RequestOutsideRenderPassOperationContext(); | 932 | : VideoCommon::ImageViewBase{params} {} |
| 524 | 933 | ||
| 525 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, | 934 | VkImageView ImageView::DepthView() { |
| 526 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, | 935 | if (depth_view) { |
| 527 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); | 936 | return *depth_view; |
| 528 | dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, | 937 | } |
| 529 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, | 938 | depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); |
| 530 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | 939 | return *depth_view; |
| 940 | } | ||
| 531 | 941 | ||
| 532 | const VkImageCopy copy{ | 942 | VkImageView ImageView::StencilView() { |
| 533 | .srcSubresource = | 943 | if (stencil_view) { |
| 534 | { | 944 | return *stencil_view; |
| 535 | .aspectMask = src_surface->GetAspectMask(), | 945 | } |
| 536 | .mipLevel = copy_params.source_level, | 946 | stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); |
| 537 | .baseArrayLayer = copy_params.source_z, | 947 | return *stencil_view; |
| 538 | .layerCount = num_layers, | 948 | } |
| 539 | }, | ||
| 540 | .srcOffset = | ||
| 541 | { | ||
| 542 | .x = static_cast<s32>(copy_params.source_x), | ||
| 543 | .y = static_cast<s32>(copy_params.source_y), | ||
| 544 | .z = 0, | ||
| 545 | }, | ||
| 546 | .dstSubresource = | ||
| 547 | { | ||
| 548 | .aspectMask = dst_surface->GetAspectMask(), | ||
| 549 | .mipLevel = copy_params.dest_level, | ||
| 550 | .baseArrayLayer = dst_base_layer, | ||
| 551 | .layerCount = num_layers, | ||
| 552 | }, | ||
| 553 | .dstOffset = | ||
| 554 | { | ||
| 555 | .x = static_cast<s32>(copy_params.dest_x), | ||
| 556 | .y = static_cast<s32>(copy_params.dest_y), | ||
| 557 | .z = static_cast<s32>(dst_offset_z), | ||
| 558 | }, | ||
| 559 | .extent = | ||
| 560 | { | ||
| 561 | .width = copy_params.width, | ||
| 562 | .height = copy_params.height, | ||
| 563 | .depth = extent_z, | ||
| 564 | }, | ||
| 565 | }; | ||
| 566 | 949 | ||
| 567 | const VkImage src_image = src_surface->GetImageHandle(); | 950 | vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { |
| 568 | const VkImage dst_image = dst_surface->GetImageHandle(); | 951 | return device->GetLogical().CreateImageView({ |
| 569 | scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { | 952 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 570 | cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | 953 | .pNext = nullptr, |
| 571 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | 954 | .flags = 0, |
| 955 | .image = image_handle, | ||
| 956 | .viewType = ImageViewType(type), | ||
| 957 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, | ||
| 958 | .components{ | ||
| 959 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 960 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 961 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 962 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 963 | }, | ||
| 964 | .subresourceRange = MakeSubresourceRange(aspect_mask, range), | ||
| 572 | }); | 965 | }); |
| 573 | } | 966 | } |
| 574 | 967 | ||
| 575 | void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | 968 | Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { |
| 576 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 969 | const auto& device = runtime.device; |
| 577 | // We can't blit inside a renderpass | 970 | const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); |
| 578 | scheduler.RequestOutsideRenderPassOperationContext(); | 971 | const std::array<float, 4> color = tsc.BorderColor(); |
| 579 | 972 | // C++20 bit_cast | |
| 580 | src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, | 973 | VkClearColorValue border_color; |
| 581 | VK_ACCESS_TRANSFER_READ_BIT); | 974 | std::memcpy(&border_color, &color, sizeof(color)); |
| 582 | dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, | 975 | const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ |
| 583 | VK_ACCESS_TRANSFER_WRITE_BIT); | 976 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, |
| 584 | 977 | .pNext = nullptr, | |
| 585 | VkImageBlit blit; | 978 | .customBorderColor = border_color, |
| 586 | blit.srcSubresource = src_view->GetImageSubresourceLayers(); | 979 | .format = VK_FORMAT_UNDEFINED, |
| 587 | blit.srcOffsets[0].x = copy_config.src_rect.left; | 980 | }; |
| 588 | blit.srcOffsets[0].y = copy_config.src_rect.top; | 981 | const void* pnext = nullptr; |
| 589 | blit.srcOffsets[0].z = 0; | 982 | if (arbitrary_borders) { |
| 590 | blit.srcOffsets[1].x = copy_config.src_rect.right; | 983 | pnext = &border_ci; |
| 591 | blit.srcOffsets[1].y = copy_config.src_rect.bottom; | 984 | } |
| 592 | blit.srcOffsets[1].z = 1; | 985 | const VkSamplerReductionModeCreateInfoEXT reduction_ci{ |
| 593 | blit.dstSubresource = dst_view->GetImageSubresourceLayers(); | 986 | .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, |
| 594 | blit.dstOffsets[0].x = copy_config.dst_rect.left; | 987 | .pNext = pnext, |
| 595 | blit.dstOffsets[0].y = copy_config.dst_rect.top; | 988 | .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), |
| 596 | blit.dstOffsets[0].z = 0; | 989 | }; |
| 597 | blit.dstOffsets[1].x = copy_config.dst_rect.right; | 990 | if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { |
| 598 | blit.dstOffsets[1].y = copy_config.dst_rect.bottom; | 991 | pnext = &reduction_ci; |
| 599 | blit.dstOffsets[1].z = 1; | 992 | } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { |
| 600 | 993 | LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); | |
| 601 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | 994 | } |
| 602 | 995 | // Some games have samplers with garbage. Sanitize them here. | |
| 603 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | 996 | const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); |
| 604 | is_linear](vk::CommandBuffer cmdbuf) { | 997 | sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ |
| 605 | cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | 998 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, |
| 606 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, | 999 | .pNext = pnext, |
| 607 | is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); | 1000 | .flags = 0, |
| 1001 | .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), | ||
| 1002 | .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), | ||
| 1003 | .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | ||
| 1004 | .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), | ||
| 1005 | .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), | ||
| 1006 | .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), | ||
| 1007 | .mipLodBias = tsc.LodBias(), | ||
| 1008 | .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), | ||
| 1009 | .maxAnisotropy = max_anisotropy, | ||
| 1010 | .compareEnable = tsc.depth_compare_enabled, | ||
| 1011 | .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), | ||
| 1012 | .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), | ||
| 1013 | .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), | ||
| 1014 | .borderColor = | ||
| 1015 | arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), | ||
| 1016 | .unnormalizedCoordinates = VK_FALSE, | ||
| 608 | }); | 1017 | }); |
| 609 | } | 1018 | } |
| 610 | 1019 | ||
| 611 | void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 1020 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 612 | // Currently unimplemented. PBO copies should be dropped and we should use a render pass to | 1021 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 613 | // convert from color to depth and viceversa. | 1022 | std::vector<VkAttachmentDescription> descriptions; |
| 614 | LOG_WARNING(Render_Vulkan, "Unimplemented"); | 1023 | std::vector<VkImageView> attachments; |
| 1024 | RenderPassKey renderpass_key{}; | ||
| 1025 | s32 num_layers = 1; | ||
| 1026 | |||
| 1027 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 1028 | const ImageView* const color_buffer = color_buffers[index]; | ||
| 1029 | if (!color_buffer) { | ||
| 1030 | renderpass_key.color_formats[index] = PixelFormat::Invalid; | ||
| 1031 | continue; | ||
| 1032 | } | ||
| 1033 | descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); | ||
| 1034 | attachments.push_back(color_buffer->RenderTarget()); | ||
| 1035 | renderpass_key.color_formats[index] = color_buffer->format; | ||
| 1036 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); | ||
| 1037 | images[num_images] = color_buffer->ImageHandle(); | ||
| 1038 | image_ranges[num_images] = MakeSubresourceRange(color_buffer); | ||
| 1039 | samples = color_buffer->Samples(); | ||
| 1040 | ++num_images; | ||
| 1041 | } | ||
| 1042 | const size_t num_colors = attachments.size(); | ||
| 1043 | const VkAttachmentReference* depth_attachment = | ||
| 1044 | depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; | ||
| 1045 | if (depth_buffer) { | ||
| 1046 | descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); | ||
| 1047 | attachments.push_back(depth_buffer->RenderTarget()); | ||
| 1048 | renderpass_key.depth_format = depth_buffer->format; | ||
| 1049 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); | ||
| 1050 | images[num_images] = depth_buffer->ImageHandle(); | ||
| 1051 | image_ranges[num_images] = MakeSubresourceRange(depth_buffer); | ||
| 1052 | samples = depth_buffer->Samples(); | ||
| 1053 | ++num_images; | ||
| 1054 | } else { | ||
| 1055 | renderpass_key.depth_format = PixelFormat::Invalid; | ||
| 1056 | } | ||
| 1057 | renderpass_key.samples = samples; | ||
| 1058 | |||
| 1059 | const auto& device = runtime.device.GetLogical(); | ||
| 1060 | const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); | ||
| 1061 | if (is_new) { | ||
| 1062 | const VkSubpassDescription subpass{ | ||
| 1063 | .flags = 0, | ||
| 1064 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 1065 | .inputAttachmentCount = 0, | ||
| 1066 | .pInputAttachments = nullptr, | ||
| 1067 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 1068 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 1069 | .pResolveAttachments = nullptr, | ||
| 1070 | .pDepthStencilAttachment = depth_attachment, | ||
| 1071 | .preserveAttachmentCount = 0, | ||
| 1072 | .pPreserveAttachments = nullptr, | ||
| 1073 | }; | ||
| 1074 | cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ | ||
| 1075 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 1076 | .pNext = nullptr, | ||
| 1077 | .flags = 0, | ||
| 1078 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 1079 | .pAttachments = descriptions.data(), | ||
| 1080 | .subpassCount = 1, | ||
| 1081 | .pSubpasses = &subpass, | ||
| 1082 | .dependencyCount = 0, | ||
| 1083 | .pDependencies = nullptr, | ||
| 1084 | }); | ||
| 1085 | } | ||
| 1086 | renderpass = *cache_pair->second; | ||
| 1087 | render_area = VkExtent2D{ | ||
| 1088 | .width = key.size.width, | ||
| 1089 | .height = key.size.height, | ||
| 1090 | }; | ||
| 1091 | num_color_buffers = static_cast<u32>(num_colors); | ||
| 1092 | framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ | ||
| 1093 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 1094 | .pNext = nullptr, | ||
| 1095 | .flags = 0, | ||
| 1096 | .renderPass = renderpass, | ||
| 1097 | .attachmentCount = static_cast<u32>(attachments.size()), | ||
| 1098 | .pAttachments = attachments.data(), | ||
| 1099 | .width = key.size.width, | ||
| 1100 | .height = key.size.height, | ||
| 1101 | .layers = static_cast<u32>(num_layers), | ||
| 1102 | }); | ||
| 1103 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 1104 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | ||
| 1105 | } | ||
| 615 | } | 1106 | } |
| 616 | 1107 | ||
| 617 | } // namespace Vulkan | 1108 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b0be4cb0f..92a7aad8b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -4,217 +4,270 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <compare> |
| 8 | #include <unordered_map> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 10 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | #include "video_core/texture_cache/surface_base.h" | ||
| 16 | #include "video_core/texture_cache/texture_cache.h" | 11 | #include "video_core/texture_cache/texture_cache.h" |
| 17 | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | |
| 18 | namespace VideoCore { | ||
| 19 | class RasterizerInterface; | ||
| 20 | } | ||
| 21 | 13 | ||
| 22 | namespace Vulkan { | 14 | namespace Vulkan { |
| 23 | 15 | ||
| 24 | class RasterizerVulkan; | 16 | using VideoCommon::ImageId; |
| 25 | class VKDevice; | 17 | using VideoCommon::NUM_RT; |
| 18 | using VideoCommon::Offset2D; | ||
| 19 | using VideoCommon::RenderTargets; | ||
| 20 | using VideoCore::Surface::PixelFormat; | ||
| 21 | |||
| 26 | class VKScheduler; | 22 | class VKScheduler; |
| 27 | class VKStagingBufferPool; | 23 | class VKStagingBufferPool; |
| 28 | 24 | ||
| 29 | class CachedSurfaceView; | 25 | class BlitImageHelper; |
| 30 | class CachedSurface; | 26 | class Device; |
| 27 | class Image; | ||
| 28 | class ImageView; | ||
| 29 | class Framebuffer; | ||
| 31 | 30 | ||
| 32 | using Surface = std::shared_ptr<CachedSurface>; | 31 | struct RenderPassKey { |
| 33 | using View = std::shared_ptr<CachedSurfaceView>; | 32 | constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; |
| 34 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 35 | 33 | ||
| 36 | using VideoCommon::SurfaceParams; | 34 | std::array<PixelFormat, NUM_RT> color_formats; |
| 37 | using VideoCommon::ViewParams; | 35 | PixelFormat depth_format; |
| 36 | VkSampleCountFlagBits samples; | ||
| 37 | }; | ||
| 38 | 38 | ||
| 39 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | 39 | } // namespace Vulkan |
| 40 | friend CachedSurfaceView; | ||
| 41 | 40 | ||
| 42 | public: | 41 | namespace std { |
| 43 | explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, | 42 | template <> |
| 44 | VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, | 43 | struct hash<Vulkan::RenderPassKey> { |
| 45 | GPUVAddr gpu_addr_, const SurfaceParams& params_); | 44 | [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { |
| 46 | ~CachedSurface(); | 45 | size_t value = static_cast<size_t>(key.depth_format) << 48; |
| 46 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 47 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 48 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 49 | } | ||
| 50 | return value; | ||
| 51 | } | ||
| 52 | }; | ||
| 53 | } // namespace std | ||
| 47 | 54 | ||
| 48 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | 55 | namespace Vulkan { |
| 49 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 50 | 56 | ||
| 51 | void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | 57 | struct ImageBufferMap { |
| 52 | VkImageLayout new_layout) { | 58 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 53 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, | 59 | return handle; |
| 54 | new_stage_mask, new_access, new_layout); | ||
| 55 | } | 60 | } |
| 56 | 61 | ||
| 57 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 62 | [[nodiscard]] std::span<u8> Span() const noexcept { |
| 58 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, | 63 | return map.Span(); |
| 59 | VkImageLayout new_layout) { | ||
| 60 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 61 | new_access, new_layout); | ||
| 62 | } | 64 | } |
| 63 | 65 | ||
| 64 | VKImage& GetImage() { | 66 | VkBuffer handle; |
| 65 | return *image; | 67 | MemoryMap map; |
| 66 | } | 68 | }; |
| 67 | 69 | ||
| 68 | const VKImage& GetImage() const { | 70 | struct TextureCacheRuntime { |
| 69 | return *image; | 71 | const Device& device; |
| 72 | VKScheduler& scheduler; | ||
| 73 | VKMemoryManager& memory_manager; | ||
| 74 | VKStagingBufferPool& staging_buffer_pool; | ||
| 75 | BlitImageHelper& blit_image_helper; | ||
| 76 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache; | ||
| 77 | |||
| 78 | void Finish(); | ||
| 79 | |||
| 80 | [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); | ||
| 81 | |||
| 82 | [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { | ||
| 83 | // TODO: Have a special function for this | ||
| 84 | return MapUploadBuffer(size); | ||
| 70 | } | 85 | } |
| 71 | 86 | ||
| 72 | VkImage GetImageHandle() const { | 87 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 73 | return *image->GetHandle(); | 88 | const std::array<Offset2D, 2>& dst_region, |
| 89 | const std::array<Offset2D, 2>& src_region, | ||
| 90 | Tegra::Engines::Fermi2D::Filter filter, | ||
| 91 | Tegra::Engines::Fermi2D::Operation operation); | ||
| 92 | |||
| 93 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 94 | |||
| 95 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); | ||
| 96 | |||
| 97 | [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { | ||
| 98 | return false; | ||
| 74 | } | 99 | } |
| 75 | 100 | ||
| 76 | VkImageAspectFlags GetAspectMask() const { | 101 | void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, |
| 77 | return image->GetAspectMask(); | 102 | std::span<const VideoCommon::SwizzleParameters>) { |
| 103 | UNREACHABLE(); | ||
| 78 | } | 104 | } |
| 79 | 105 | ||
| 80 | VkBufferView GetBufferViewHandle() const { | 106 | void InsertUploadMemoryBarrier() {} |
| 81 | return *buffer_view; | 107 | |
| 108 | bool HasBrokenTextureViewFormats() const noexcept { | ||
| 109 | // No known Vulkan driver has broken image views | ||
| 110 | return false; | ||
| 82 | } | 111 | } |
| 112 | }; | ||
| 83 | 113 | ||
| 84 | protected: | 114 | class Image : public VideoCommon::ImageBase { |
| 85 | void DecorateSurfaceName() override; | 115 | public: |
| 116 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | ||
| 117 | VAddr cpu_addr); | ||
| 86 | 118 | ||
| 87 | View CreateView(const ViewParams& view_params) override; | 119 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 120 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 88 | 121 | ||
| 89 | private: | 122 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 90 | void UploadBuffer(const std::vector<u8>& staging_buffer); | 123 | std::span<const VideoCommon::BufferCopy> copies); |
| 91 | 124 | ||
| 92 | void UploadImage(const std::vector<u8>& staging_buffer); | 125 | void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 126 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 93 | 127 | ||
| 94 | VkBufferImageCopy GetBufferImageCopy(u32 level) const; | 128 | [[nodiscard]] VkImage Handle() const noexcept { |
| 129 | return *image; | ||
| 130 | } | ||
| 95 | 131 | ||
| 96 | VkImageSubresourceRange GetImageSubresourceRange() const; | 132 | [[nodiscard]] VkBuffer Buffer() const noexcept { |
| 133 | return *buffer; | ||
| 134 | } | ||
| 97 | 135 | ||
| 98 | const VKDevice& device; | 136 | [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { |
| 99 | VKMemoryManager& memory_manager; | 137 | return aspect_mask; |
| 100 | VKScheduler& scheduler; | 138 | } |
| 101 | VKStagingBufferPool& staging_pool; | ||
| 102 | 139 | ||
| 103 | std::optional<VKImage> image; | 140 | private: |
| 141 | VKScheduler* scheduler; | ||
| 142 | vk::Image image; | ||
| 104 | vk::Buffer buffer; | 143 | vk::Buffer buffer; |
| 105 | vk::BufferView buffer_view; | ||
| 106 | VKMemoryCommit commit; | 144 | VKMemoryCommit commit; |
| 107 | 145 | VkImageAspectFlags aspect_mask = 0; | |
| 108 | VkFormat format = VK_FORMAT_UNDEFINED; | 146 | bool initialized = false; |
| 109 | }; | 147 | }; |
| 110 | 148 | ||
| 111 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 149 | class ImageView : public VideoCommon::ImageViewBase { |
| 112 | public: | 150 | public: |
| 113 | explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, | 151 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 114 | const ViewParams& view_params_); | 152 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 115 | ~CachedSurfaceView(); | ||
| 116 | 153 | ||
| 117 | VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, | 154 | [[nodiscard]] VkImageView DepthView(); |
| 118 | Tegra::Texture::SwizzleSource y_source, | ||
| 119 | Tegra::Texture::SwizzleSource z_source, | ||
| 120 | Tegra::Texture::SwizzleSource w_source); | ||
| 121 | 155 | ||
| 122 | VkImageView GetAttachment(); | 156 | [[nodiscard]] VkImageView StencilView(); |
| 123 | 157 | ||
| 124 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | 158 | [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { |
| 125 | return &surface == &rhs.surface; | 159 | return *image_views[static_cast<size_t>(query_type)]; |
| 126 | } | 160 | } |
| 127 | 161 | ||
| 128 | u32 GetWidth() const { | 162 | [[nodiscard]] VkBufferView BufferView() const noexcept { |
| 129 | return surface_params.GetMipWidth(base_level); | 163 | return *buffer_view; |
| 130 | } | 164 | } |
| 131 | 165 | ||
| 132 | u32 GetHeight() const { | 166 | [[nodiscard]] VkImage ImageHandle() const noexcept { |
| 133 | return surface_params.GetMipHeight(base_level); | 167 | return image_handle; |
| 134 | } | 168 | } |
| 135 | 169 | ||
| 136 | u32 GetNumLayers() const { | 170 | [[nodiscard]] VkImageView RenderTarget() const noexcept { |
| 137 | return num_layers; | 171 | return render_target; |
| 138 | } | 172 | } |
| 139 | 173 | ||
| 140 | bool IsBufferView() const { | 174 | [[nodiscard]] PixelFormat ImageFormat() const noexcept { |
| 141 | return buffer_view; | 175 | return image_format; |
| 142 | } | 176 | } |
| 143 | 177 | ||
| 144 | VkImage GetImage() const { | 178 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 145 | return image; | 179 | return samples; |
| 146 | } | 180 | } |
| 147 | 181 | ||
| 148 | VkBufferView GetBufferView() const { | 182 | private: |
| 149 | return buffer_view; | 183 | [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); |
| 150 | } | ||
| 151 | 184 | ||
| 152 | VkImageSubresourceRange GetImageSubresourceRange() const { | 185 | const Device* device = nullptr; |
| 153 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; | 186 | std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; |
| 154 | } | 187 | vk::ImageView depth_view; |
| 188 | vk::ImageView stencil_view; | ||
| 189 | vk::BufferView buffer_view; | ||
| 190 | VkImage image_handle = VK_NULL_HANDLE; | ||
| 191 | VkImageView render_target = VK_NULL_HANDLE; | ||
| 192 | PixelFormat image_format = PixelFormat::Invalid; | ||
| 193 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 194 | }; | ||
| 155 | 195 | ||
| 156 | VkImageSubresourceLayers GetImageSubresourceLayers() const { | 196 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
| 157 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; | ||
| 158 | } | ||
| 159 | 197 | ||
| 160 | void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, | 198 | class Sampler { |
| 161 | VkAccessFlags new_access) const { | 199 | public: |
| 162 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | 200 | explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); |
| 163 | new_access, new_layout); | ||
| 164 | } | ||
| 165 | 201 | ||
| 166 | void MarkAsModified(u64 tick) { | 202 | [[nodiscard]] VkSampler Handle() const noexcept { |
| 167 | surface.MarkAsModified(true, tick); | 203 | return *sampler; |
| 168 | } | 204 | } |
| 169 | 205 | ||
| 170 | private: | 206 | private: |
| 171 | // Store a copy of these values to avoid double dereference when reading them | 207 | vk::Sampler sampler; |
| 172 | const SurfaceParams surface_params; | ||
| 173 | const VkImage image; | ||
| 174 | const VkBufferView buffer_view; | ||
| 175 | const VkImageAspectFlags aspect_mask; | ||
| 176 | |||
| 177 | const VKDevice& device; | ||
| 178 | CachedSurface& surface; | ||
| 179 | const u32 base_level; | ||
| 180 | const u32 num_levels; | ||
| 181 | const VkImageViewType image_view_type; | ||
| 182 | u32 base_layer = 0; | ||
| 183 | u32 num_layers = 0; | ||
| 184 | u32 base_slice = 0; | ||
| 185 | u32 num_slices = 0; | ||
| 186 | |||
| 187 | VkImageView last_image_view = nullptr; | ||
| 188 | u32 last_swizzle = 0; | ||
| 189 | |||
| 190 | vk::ImageView render_target; | ||
| 191 | std::unordered_map<u32, vk::ImageView> view_cache; | ||
| 192 | }; | 208 | }; |
| 193 | 209 | ||
| 194 | class VKTextureCache final : public TextureCacheBase { | 210 | class Framebuffer { |
| 195 | public: | 211 | public: |
| 196 | explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, | 212 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, |
| 197 | Tegra::Engines::Maxwell3D& maxwell3d_, | 213 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 198 | Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, | ||
| 199 | VKMemoryManager& memory_manager_, VKScheduler& scheduler_, | ||
| 200 | VKStagingBufferPool& staging_pool_); | ||
| 201 | ~VKTextureCache(); | ||
| 202 | 214 | ||
| 203 | private: | 215 | [[nodiscard]] VkFramebuffer Handle() const noexcept { |
| 204 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | 216 | return *framebuffer; |
| 217 | } | ||
| 205 | 218 | ||
| 206 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | 219 | [[nodiscard]] VkRenderPass RenderPass() const noexcept { |
| 207 | const VideoCommon::CopyParams& copy_params) override; | 220 | return renderpass; |
| 221 | } | ||
| 208 | 222 | ||
| 209 | void ImageBlit(View& src_view, View& dst_view, | 223 | [[nodiscard]] VkExtent2D RenderArea() const noexcept { |
| 210 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 224 | return render_area; |
| 225 | } | ||
| 211 | 226 | ||
| 212 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | 227 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 228 | return samples; | ||
| 229 | } | ||
| 213 | 230 | ||
| 214 | const VKDevice& device; | 231 | [[nodiscard]] u32 NumColorBuffers() const noexcept { |
| 215 | VKMemoryManager& memory_manager; | 232 | return num_color_buffers; |
| 216 | VKScheduler& scheduler; | 233 | } |
| 217 | VKStagingBufferPool& staging_pool; | 234 | |
| 235 | [[nodiscard]] u32 NumImages() const noexcept { | ||
| 236 | return num_images; | ||
| 237 | } | ||
| 238 | |||
| 239 | [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept { | ||
| 240 | return images; | ||
| 241 | } | ||
| 242 | |||
| 243 | [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept { | ||
| 244 | return image_ranges; | ||
| 245 | } | ||
| 246 | |||
| 247 | private: | ||
| 248 | vk::Framebuffer framebuffer; | ||
| 249 | VkRenderPass renderpass{}; | ||
| 250 | VkExtent2D render_area{}; | ||
| 251 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 252 | u32 num_color_buffers = 0; | ||
| 253 | u32 num_images = 0; | ||
| 254 | std::array<VkImage, 9> images{}; | ||
| 255 | std::array<VkImageSubresourceRange, 9> image_ranges{}; | ||
| 218 | }; | 256 | }; |
| 219 | 257 | ||
| 258 | struct TextureCacheParams { | ||
| 259 | static constexpr bool ENABLE_VALIDATION = true; | ||
| 260 | static constexpr bool FRAMEBUFFER_BLITS = false; | ||
| 261 | static constexpr bool HAS_EMULATED_COPIES = false; | ||
| 262 | |||
| 263 | using Runtime = Vulkan::TextureCacheRuntime; | ||
| 264 | using Image = Vulkan::Image; | ||
| 265 | using ImageAlloc = Vulkan::ImageAlloc; | ||
| 266 | using ImageView = Vulkan::ImageView; | ||
| 267 | using Sampler = Vulkan::Sampler; | ||
| 268 | using Framebuffer = Vulkan::Framebuffer; | ||
| 269 | }; | ||
| 270 | |||
| 271 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||
| 272 | |||
| 220 | } // namespace Vulkan | 273 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 8826da325..f99273c6a 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -7,14 +7,14 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_device.h" |
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) | 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) |
| 18 | : device{device_}, scheduler{scheduler_} {} | 18 | : device{device_}, scheduler{scheduler_} {} |
| 19 | 19 | ||
| 20 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; | 20 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f7e3c9821..e214f7195 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -8,11 +8,11 @@ | |||
| 8 | #include <boost/container/static_vector.hpp> | 8 | #include <boost/container/static_vector.hpp> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | class VKDevice; | 15 | class Device; |
| 16 | class VKScheduler; | 16 | class VKScheduler; |
| 17 | 17 | ||
| 18 | struct DescriptorUpdateEntry { | 18 | struct DescriptorUpdateEntry { |
| @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { | |||
| 31 | 31 | ||
| 32 | class VKUpdateDescriptorQueue final { | 32 | class VKUpdateDescriptorQueue final { |
| 33 | public: | 33 | public: |
| 34 | explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); | 34 | explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_); |
| 35 | ~VKUpdateDescriptorQueue(); | 35 | ~VKUpdateDescriptorQueue(); |
| 36 | 36 | ||
| 37 | void TickFrame(); | 37 | void TickFrame(); |
| @@ -40,32 +40,36 @@ public: | |||
| 40 | 40 | ||
| 41 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); | 41 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); |
| 42 | 42 | ||
| 43 | void AddSampledImage(VkSampler sampler, VkImageView image_view) { | 43 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { |
| 44 | payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); | 44 | payload.emplace_back(VkDescriptorImageInfo{ |
| 45 | .sampler = sampler, | ||
| 46 | .imageView = image_view, | ||
| 47 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 48 | }); | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | void AddImage(VkImageView image_view) { | 51 | void AddImage(VkImageView image_view) { |
| 48 | payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); | 52 | payload.emplace_back(VkDescriptorImageInfo{ |
| 53 | .sampler = VK_NULL_HANDLE, | ||
| 54 | .imageView = image_view, | ||
| 55 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 56 | }); | ||
| 49 | } | 57 | } |
| 50 | 58 | ||
| 51 | void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { | 59 | void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { |
| 52 | payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); | 60 | payload.emplace_back(VkDescriptorBufferInfo{ |
| 61 | .buffer = buffer, | ||
| 62 | .offset = offset, | ||
| 63 | .range = size, | ||
| 64 | }); | ||
| 53 | } | 65 | } |
| 54 | 66 | ||
| 55 | void AddTexelBuffer(VkBufferView texel_buffer) { | 67 | void AddTexelBuffer(VkBufferView texel_buffer) { |
| 56 | payload.emplace_back(texel_buffer); | 68 | payload.emplace_back(texel_buffer); |
| 57 | } | 69 | } |
| 58 | 70 | ||
| 59 | VkImageLayout* LastImageLayout() { | ||
| 60 | return &payload.back().image.imageLayout; | ||
| 61 | } | ||
| 62 | |||
| 63 | const VkImageLayout* LastImageLayout() const { | ||
| 64 | return &payload.back().image.imageLayout; | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | 71 | private: |
| 68 | const VKDevice& device; | 72 | const Device& device; |
| 69 | VKScheduler& scheduler; | 73 | VKScheduler& scheduler; |
| 70 | 74 | ||
| 71 | const DescriptorUpdateEntry* upload_start = nullptr; | 75 | const DescriptorUpdateEntry* upload_start = nullptr; |
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null | |||
| @@ -1,21 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/cityhash.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/sampler_cache.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | std::size_t SamplerCacheKey::Hash() const { | ||
| 12 | static_assert(sizeof(raw) % sizeof(u64) == 0); | ||
| 13 | return static_cast<std::size_t>( | ||
| 14 | Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); | ||
| 15 | } | ||
| 16 | |||
| 17 | bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { | ||
| 18 | return raw == rhs.raw; | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace VideoCommon | ||
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null | |||
| @@ -1,60 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { | ||
| 15 | std::size_t Hash() const; | ||
| 16 | |||
| 17 | bool operator==(const SamplerCacheKey& rhs) const; | ||
| 18 | |||
| 19 | bool operator!=(const SamplerCacheKey& rhs) const { | ||
| 20 | return !operator==(rhs); | ||
| 21 | } | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // namespace VideoCommon | ||
| 25 | |||
| 26 | namespace std { | ||
| 27 | |||
| 28 | template <> | ||
| 29 | struct hash<VideoCommon::SamplerCacheKey> { | ||
| 30 | std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { | ||
| 31 | return k.Hash(); | ||
| 32 | } | ||
| 33 | }; | ||
| 34 | |||
| 35 | } // namespace std | ||
| 36 | |||
| 37 | namespace VideoCommon { | ||
| 38 | |||
| 39 | template <typename SamplerType, typename SamplerStorageType> | ||
| 40 | class SamplerCache { | ||
| 41 | public: | ||
| 42 | SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { | ||
| 43 | const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); | ||
| 44 | auto& sampler = entry->second; | ||
| 45 | if (is_cache_miss) { | ||
| 46 | sampler = CreateSampler(tsc); | ||
| 47 | } | ||
| 48 | return ToSamplerType(sampler); | ||
| 49 | } | ||
| 50 | |||
| 51 | protected: | ||
| 52 | virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; | ||
| 53 | |||
| 54 | virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; | ||
| 55 | |||
| 56 | private: | ||
| 57 | std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; | ||
| 58 | }; | ||
| 59 | |||
| 60 | } // namespace VideoCommon \ No newline at end of file | ||
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 78245473c..9707136e9 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp | |||
| @@ -134,13 +134,12 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | |||
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | 136 | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, |
| 137 | const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, | 137 | const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, |
| 138 | Vulkan::VKDescriptorPool& descriptor_pool, | 138 | Vulkan::VKDescriptorPool& descriptor_pool, |
| 139 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | 139 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |
| 140 | Vulkan::VKRenderPassCache& renderpass_cache, | ||
| 141 | std::vector<VkDescriptorSetLayoutBinding> bindings, | 140 | std::vector<VkDescriptorSetLayoutBinding> bindings, |
| 142 | Vulkan::SPIRVProgram program, | 141 | Vulkan::SPIRVProgram program, |
| 143 | Vulkan::GraphicsPipelineCacheKey key) { | 142 | Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { |
| 144 | std::unique_lock lock(queue_mutex); | 143 | std::unique_lock lock(queue_mutex); |
| 145 | pending_queue.push({ | 144 | pending_queue.push({ |
| 146 | .backend = Backend::Vulkan, | 145 | .backend = Backend::Vulkan, |
| @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | |||
| 149 | .scheduler = &scheduler, | 148 | .scheduler = &scheduler, |
| 150 | .descriptor_pool = &descriptor_pool, | 149 | .descriptor_pool = &descriptor_pool, |
| 151 | .update_descriptor_queue = &update_descriptor_queue, | 150 | .update_descriptor_queue = &update_descriptor_queue, |
| 152 | .renderpass_cache = &renderpass_cache, | ||
| 153 | .bindings = std::move(bindings), | 151 | .bindings = std::move(bindings), |
| 154 | .program = std::move(program), | 152 | .program = std::move(program), |
| 155 | .key = key, | 153 | .key = key, |
| 154 | .num_color_buffers = num_color_buffers, | ||
| 156 | }); | 155 | }); |
| 157 | cv.notify_one(); | 156 | cv.notify_one(); |
| 158 | } | 157 | } |
| @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context | |||
| 205 | } else if (work.backend == Backend::Vulkan) { | 204 | } else if (work.backend == Backend::Vulkan) { |
| 206 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | 205 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( |
| 207 | *work.vk_device, *work.scheduler, *work.descriptor_pool, | 206 | *work.vk_device, *work.scheduler, *work.descriptor_pool, |
| 208 | *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, | 207 | *work.update_descriptor_queue, work.key, work.bindings, work.program, |
| 209 | work.program); | 208 | work.num_color_buffers); |
| 210 | 209 | ||
| 211 | work.pp_cache->EmplacePipeline(std::move(pipeline)); | 210 | work.pp_cache->EmplacePipeline(std::move(pipeline)); |
| 212 | } | 211 | } |
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 5a7216019..0dbb1a31f 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -24,9 +24,9 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_device.h" | 24 | #include "video_core/renderer_opengl/gl_device.h" |
| 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 28 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 29 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 30 | 30 | ||
| 31 | namespace Core::Frontend { | 31 | namespace Core::Frontend { |
| 32 | class EmuWindow; | 32 | class EmuWindow; |
| @@ -94,13 +94,13 @@ public: | |||
| 94 | CompilerSettings compiler_settings, const Registry& registry, | 94 | CompilerSettings compiler_settings, const Registry& registry, |
| 95 | VAddr cpu_addr); | 95 | VAddr cpu_addr); |
| 96 | 96 | ||
| 97 | void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, | 97 | void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, |
| 98 | Vulkan::VKScheduler& scheduler, | 98 | Vulkan::VKScheduler& scheduler, |
| 99 | Vulkan::VKDescriptorPool& descriptor_pool, | 99 | Vulkan::VKDescriptorPool& descriptor_pool, |
| 100 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | 100 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |
| 101 | Vulkan::VKRenderPassCache& renderpass_cache, | ||
| 102 | std::vector<VkDescriptorSetLayoutBinding> bindings, | 101 | std::vector<VkDescriptorSetLayoutBinding> bindings, |
| 103 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); | 102 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, |
| 103 | u32 num_color_buffers); | ||
| 104 | 104 | ||
| 105 | private: | 105 | private: |
| 106 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | 106 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); |
| @@ -123,14 +123,14 @@ private: | |||
| 123 | 123 | ||
| 124 | // For Vulkan | 124 | // For Vulkan |
| 125 | Vulkan::VKPipelineCache* pp_cache; | 125 | Vulkan::VKPipelineCache* pp_cache; |
| 126 | const Vulkan::VKDevice* vk_device; | 126 | const Vulkan::Device* vk_device; |
| 127 | Vulkan::VKScheduler* scheduler; | 127 | Vulkan::VKScheduler* scheduler; |
| 128 | Vulkan::VKDescriptorPool* descriptor_pool; | 128 | Vulkan::VKDescriptorPool* descriptor_pool; |
| 129 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | 129 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; |
| 130 | Vulkan::VKRenderPassCache* renderpass_cache; | ||
| 131 | std::vector<VkDescriptorSetLayoutBinding> bindings; | 130 | std::vector<VkDescriptorSetLayoutBinding> bindings; |
| 132 | Vulkan::SPIRVProgram program; | 131 | Vulkan::SPIRVProgram program; |
| 133 | Vulkan::GraphicsPipelineCacheKey key; | 132 | Vulkan::GraphicsPipelineCacheKey key; |
| 133 | u32 num_color_buffers; | ||
| 134 | }; | 134 | }; |
| 135 | 135 | ||
| 136 | std::condition_variable cv; | 136 | std::condition_variable cv; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index ab14c1aa3..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode; | |||
| 25 | namespace { | 25 | namespace { |
| 26 | 26 | ||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 28 | const std::list<Sampler>& used_samplers) { | 28 | const std::list<SamplerEntry>& used_samplers) { |
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 30 | return; | 30 | return; |
| 31 | } | 31 | } |
| @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | |||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, |
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | 47 | VideoCore::GuestDriverProfile& gpu_driver, |
| 48 | const std::list<Sampler>& used_samplers) { | 48 | const std::list<SamplerEntry>& used_samplers) { |
| 49 | const u32 base_offset = sampler_to_deduce.offset; | 49 | const u32 base_offset = sampler_to_deduce.offset; |
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | 50 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 51 | for (const auto& sampler : used_samplers) { | 51 | for (const auto& sampler : used_samplers) { |
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index b2e88fa20..fa83108cd 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 22 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| 23 | const auto opcode = OpCode::Decode(instr); | 23 | const auto opcode = OpCode::Decode(instr); |
| 24 | 24 | ||
| 25 | PredCondition cond; | 25 | PredCondition cond{}; |
| 26 | bool bf; | 26 | bool bf = false; |
| 27 | bool ftz; | 27 | bool ftz = false; |
| 28 | bool neg_a; | 28 | bool neg_a = false; |
| 29 | bool abs_a; | 29 | bool abs_a = false; |
| 30 | bool neg_b; | 30 | bool neg_b = false; |
| 31 | bool abs_b; | 31 | bool abs_b = false; |
| 32 | switch (opcode->get().GetId()) { | 32 | switch (opcode->get().GetId()) { |
| 33 | case OpCode::Id::HSET2_C: | 33 | case OpCode::Id::HSET2_C: |
| 34 | case OpCode::Id::HSET2_IMM: | 34 | case OpCode::Id::HSET2_IMM: |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 532f66d27..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 497 | return pc; | 497 | return pc; |
| 498 | } | 498 | } |
| 499 | 499 | ||
| 500 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | 501 | const auto offset = static_cast<u32>(image.index.Value()); |
| 502 | 502 | ||
| 503 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | 503 | const auto it = |
| 504 | [offset](const Image& entry) { return entry.offset == offset; }); | 504 | std::find_if(std::begin(used_images), std::end(used_images), |
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 505 | if (it != std::end(used_images)) { | 506 | if (it != std::end(used_images)) { |
| 506 | ASSERT(!it->is_bindless && it->type == type); | 507 | ASSERT(!it->is_bindless && it->type == type); |
| 507 | return *it; | 508 | return *it; |
| @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t | |||
| 511 | return used_images.emplace_back(next_index, offset, type); | 512 | return used_images.emplace_back(next_index, offset, type); |
| 512 | } | 513 | } |
| 513 | 514 | ||
| 514 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { |
| 515 | const Node image_register = GetRegister(reg); | 516 | const Node image_register = GetRegister(reg); |
| 516 | const auto result = | 517 | const auto result = |
| 517 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); |
| @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im | |||
| 520 | const auto offset = std::get<2>(result); | 521 | const auto offset = std::get<2>(result); |
| 521 | 522 | ||
| 522 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), |
| 523 | [buffer, offset](const Image& entry) { | 524 | [buffer, offset](const ImageEntry& entry) { |
| 524 | return entry.buffer == buffer && entry.offset == offset; | 525 | return entry.buffer == buffer && entry.offset == offset; |
| 525 | }); | 526 | }); |
| 526 | if (it != std::end(used_images)) { | 527 | if (it != std::end(used_images)) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index fb18f631f..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 141 | 141 | ||
| 142 | SamplerInfo info; | 142 | SamplerInfo info; |
| 143 | info.is_shadow = is_depth_compare; | 143 | info.is_shadow = is_depth_compare; |
| 144 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); | 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |
| 145 | 145 | ||
| 146 | Node4 values; | 146 | Node4 values; |
| 147 | for (u32 element = 0; element < values.size(); ++element) { | 147 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 173 | SamplerInfo info; | 173 | SamplerInfo info; |
| 174 | info.type = texture_type; | 174 | info.type = texture_type; |
| 175 | info.is_array = is_array; | 175 | info.is_array = is_array; |
| 176 | const std::optional<Sampler> sampler = is_bindless | 176 | const std::optional<SamplerEntry> sampler = |
| 177 | ? GetBindlessSampler(base_reg, info, index_var) | 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) |
| 178 | : GetSampler(instr.sampler, info); | 178 | : GetSampler(instr.sampler, info); |
| 179 | Node4 values; | 179 | Node4 values; |
| 180 | if (!sampler) { | 180 | if (!sampler) { |
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); |
| @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 217 | [[fallthrough]]; | 217 | [[fallthrough]]; |
| 218 | case OpCode::Id::TXQ: { | 218 | case OpCode::Id::TXQ: { |
| 219 | Node index_var; | 219 | Node index_var; |
| 220 | const std::optional<Sampler> sampler = is_bindless | 220 | const std::optional<SamplerEntry> sampler = |
| 221 | ? GetBindlessSampler(instr.gpr8, {}, index_var) | 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) |
| 222 | : GetSampler(instr.sampler, {}); | 222 | : GetSampler(instr.sampler, {}); |
| 223 | 223 | ||
| 224 | if (!sampler) { | 224 | if (!sampler) { |
| 225 | u32 indexer = 0; | 225 | u32 indexer = 0; |
| @@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 272 | info.type = texture_type; | 272 | info.type = texture_type; |
| 273 | info.is_array = is_array; | 273 | info.is_array = is_array; |
| 274 | Node index_var; | 274 | Node index_var; |
| 275 | const std::optional<Sampler> sampler = | 275 | const std::optional<SamplerEntry> sampler = |
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) |
| 277 | : GetSampler(instr.sampler, info); | 277 | : GetSampler(instr.sampler, info); |
| 278 | 278 | ||
| @@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | |||
| 379 | return info; | 379 | return info; |
| 380 | } | 380 | } |
| 381 | 381 | ||
| 382 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, |
| 383 | SamplerInfo sampler_info) { | 383 | SamplerInfo sampler_info) { |
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); |
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); |
| 386 | 386 | ||
| 387 | // If this sampler has already been used, return the existing mapping. | 387 | // If this sampler has already been used, return the existing mapping. |
| 388 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 388 | const auto it = |
| 389 | [offset](const Sampler& entry) { return entry.offset == offset; }); | 389 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 390 | if (it != used_samplers.end()) { | 391 | if (it != used_samplers.end()) { |
| 391 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |
| 392 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |
| @@ -399,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | |||
| 399 | *info.is_shadow, *info.is_buffer, false); | 400 | *info.is_shadow, *info.is_buffer, false); |
| 400 | } | 401 | } |
| 401 | 402 | ||
| 402 | std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, |
| 403 | Node& index_var) { | 404 | SamplerInfo info, Node& index_var) { |
| 404 | const Node sampler_register = GetRegister(reg); | 405 | const Node sampler_register = GetRegister(reg); |
| 405 | const auto [base_node, tracked_sampler_info] = | 406 | const auto [base_node, tracked_sampler_info] = |
| 406 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| @@ -416,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 416 | 417 | ||
| 417 | // If this sampler has already been used, return the existing mapping. | 418 | // If this sampler has already been used, return the existing mapping. |
| 418 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| 419 | [buffer, offset](const Sampler& entry) { | 420 | [buffer, offset](const SamplerEntry& entry) { |
| 420 | return entry.buffer == buffer && entry.offset == offset; | 421 | return entry.buffer == buffer && entry.offset == offset; |
| 421 | }); | 422 | }); |
| 422 | if (it != used_samplers.end()) { | 423 | if (it != used_samplers.end()) { |
| @@ -436,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 436 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); |
| 437 | 438 | ||
| 438 | // Try to use an already created sampler if it exists | 439 | // Try to use an already created sampler if it exists |
| 439 | const auto it = std::find_if( | 440 | const auto it = |
| 440 | used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { | 441 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| 441 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | 442 | [indices, offsets](const SamplerEntry& entry) { |
| 442 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && |
| 443 | }); | 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; |
| 445 | }); | ||
| 444 | if (it != used_samplers.end()) { | 446 | if (it != used_samplers.end()) { |
| 445 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && |
| 446 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |
| @@ -460,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 460 | // If this sampler has already been used, return the existing mapping. | 462 | // If this sampler has already been used, return the existing mapping. |
| 461 | const auto it = std::find_if( | 463 | const auto it = std::find_if( |
| 462 | used_samplers.begin(), used_samplers.end(), | 464 | used_samplers.begin(), used_samplers.end(), |
| 463 | [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); | 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); |
| 464 | if (it != used_samplers.end()) { | 466 | if (it != used_samplers.end()) { |
| 465 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |
| 466 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && |
| @@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 565 | info.is_buffer = false; | 567 | info.is_buffer = false; |
| 566 | 568 | ||
| 567 | Node index_var; | 569 | Node index_var; |
| 568 | const std::optional<Sampler> sampler = is_bindless | 570 | const std::optional<SamplerEntry> sampler = |
| 569 | ? GetBindlessSampler(*bindless_reg, info, index_var) | 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) |
| 570 | : GetSampler(instr.sampler, info); | 572 | : GetSampler(instr.sampler, info); |
| 571 | if (!sampler) { | 573 | if (!sampler) { |
| 572 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; |
| 573 | } | 575 | } |
| @@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 724 | info.is_shadow = depth_compare; | 726 | info.is_shadow = depth_compare; |
| 725 | 727 | ||
| 726 | Node index_var; | 728 | Node index_var; |
| 727 | const std::optional<Sampler> sampler = | 729 | const std::optional<SamplerEntry> sampler = |
| 728 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) |
| 729 | : GetSampler(instr.sampler, info); | 731 | : GetSampler(instr.sampler, info); |
| 730 | Node4 values; | 732 | Node4 values; |
| @@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 783 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |
| 784 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |
| 785 | 787 | ||
| 786 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); | 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); |
| 787 | 789 | ||
| 788 | Node4 values; | 790 | Node4 values; |
| 789 | for (u32 element = 0; element < values.size(); ++element) { | 791 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 800 | info.type = texture_type; | 802 | info.type = texture_type; |
| 801 | info.is_array = is_array; | 803 | info.is_array = is_array; |
| 802 | info.is_shadow = false; | 804 | info.is_shadow = false; |
| 803 | const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); | 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |
| 804 | 806 | ||
| 805 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 806 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8db9e1de7..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -282,25 +282,24 @@ struct SeparateSamplerNode; | |||
| 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; | 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; |
| 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; |
| 284 | 284 | ||
| 285 | struct Sampler { | 285 | struct SamplerEntry { |
| 286 | /// Bound samplers constructor | 286 | /// Bound samplers constructor |
| 287 | constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, | 287 | explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, |
| 288 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) | 288 | bool is_shadow_, bool is_buffer_, bool is_indexed_) |
| 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, | 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, |
| 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} | 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} |
| 291 | 291 | ||
| 292 | /// Separate sampler constructor | 292 | /// Separate sampler constructor |
| 293 | constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets_, | 293 | explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, |
| 294 | std::pair<u32, u32> buffers_, Tegra::Shader::TextureType type_, | 294 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, |
| 295 | bool is_array_, bool is_shadow_, bool is_buffer_) | 295 | bool is_buffer_) |
| 296 | : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, | 296 | : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, |
| 297 | buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, | 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, |
| 298 | is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} | 298 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} |
| 299 | 299 | ||
| 300 | /// Bindless samplers constructor | 300 | /// Bindless samplers constructor |
| 301 | constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, | 301 | explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, |
| 302 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, | 302 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) |
| 303 | bool is_buffer_, bool is_indexed_) | ||
| 304 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, | 303 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, |
| 305 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { | 304 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { |
| 306 | } | 305 | } |
| @@ -340,14 +339,14 @@ struct BindlessSamplerNode { | |||
| 340 | u32 offset; | 339 | u32 offset; |
| 341 | }; | 340 | }; |
| 342 | 341 | ||
| 343 | struct Image { | 342 | struct ImageEntry { |
| 344 | public: | 343 | public: |
| 345 | /// Bound images constructor | 344 | /// Bound images constructor |
| 346 | constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) | 345 | explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) |
| 347 | : index{index_}, offset{offset_}, type{type_} {} | 346 | : index{index_}, offset{offset_}, type{type_} {} |
| 348 | 347 | ||
| 349 | /// Bindless samplers constructor | 348 | /// Bindless samplers constructor |
| 350 | constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) | 349 | explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) |
| 351 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} | 350 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} |
| 352 | 351 | ||
| 353 | void MarkWrite() { | 352 | void MarkWrite() { |
| @@ -391,7 +390,7 @@ struct MetaArithmetic { | |||
| 391 | 390 | ||
| 392 | /// Parameters describing a texture sampler | 391 | /// Parameters describing a texture sampler |
| 393 | struct MetaTexture { | 392 | struct MetaTexture { |
| 394 | Sampler sampler; | 393 | SamplerEntry sampler; |
| 395 | Node array; | 394 | Node array; |
| 396 | Node depth_compare; | 395 | Node depth_compare; |
| 397 | std::vector<Node> aoffi; | 396 | std::vector<Node> aoffi; |
| @@ -405,7 +404,7 @@ struct MetaTexture { | |||
| 405 | }; | 404 | }; |
| 406 | 405 | ||
| 407 | struct MetaImage { | 406 | struct MetaImage { |
| 408 | const Image& image; | 407 | const ImageEntry& image; |
| 409 | std::vector<Node> values; | 408 | std::vector<Node> values; |
| 410 | u32 element{}; | 409 | u32 element{}; |
| 411 | }; | 410 | }; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aae14e34..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -94,11 +94,11 @@ public: | |||
| 94 | return used_cbufs; | 94 | return used_cbufs; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | const std::list<Sampler>& GetSamplers() const { | 97 | const std::list<SamplerEntry>& GetSamplers() const { |
| 98 | return used_samplers; | 98 | return used_samplers; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | const std::list<Image>& GetImages() const { | 101 | const std::list<ImageEntry>& GetImages() const { |
| 102 | return used_images; | 102 | return used_images; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| @@ -334,17 +334,17 @@ private: | |||
| 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); | 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); |
| 335 | 335 | ||
| 336 | /// Accesses a texture sampler. | 336 | /// Accesses a texture sampler. |
| 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | 337 | std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); |
| 338 | 338 | ||
| 339 | /// Accesses a texture sampler for a bindless texture. | 339 | /// Accesses a texture sampler for a bindless texture. |
| 340 | std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | 340 | std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, |
| 341 | Node& index_var); | 341 | Node& index_var); |
| 342 | 342 | ||
| 343 | /// Accesses an image. | 343 | /// Accesses an image. |
| 344 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 344 | ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |
| 345 | 345 | ||
| 346 | /// Access a bindless image sampler. | 346 | /// Access a bindless image sampler. |
| 347 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | 347 | ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); |
| 348 | 348 | ||
| 349 | /// Extracts a sequence of bits from a node | 349 | /// Extracts a sequence of bits from a node |
| 350 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 350 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| @@ -454,8 +454,8 @@ private: | |||
| 454 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; | 454 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; |
| 455 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | 455 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
| 456 | std::map<u32, ConstBuffer> used_cbufs; | 456 | std::map<u32, ConstBuffer> used_cbufs; |
| 457 | std::list<Sampler> used_samplers; | 457 | std::list<SamplerEntry> used_samplers; |
| 458 | std::list<Image> used_images; | 458 | std::list<ImageEntry> used_images; |
| 459 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 459 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 460 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 460 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 461 | bool uses_layer{}; | 461 | bool uses_layer{}; |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 937e29d1e..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) { | |||
| 280 | } | 280 | } |
| 281 | 281 | ||
| 282 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | 282 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { |
| 283 | return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; | 283 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; |
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | } // namespace VideoCore::Surface | 286 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -120,7 +120,7 @@ enum class PixelFormat { | |||
| 120 | Max = MaxDepthStencilFormat, | 120 | Max = MaxDepthStencilFormat, |
| 121 | Invalid = 255, | 121 | Invalid = 255, |
| 122 | }; | 122 | }; |
| 123 | static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); | 123 | constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); |
| 124 | 124 | ||
| 125 | enum class SurfaceType { | 125 | enum class SurfaceType { |
| 126 | ColorTexture = 0, | 126 | ColorTexture = 0, |
| @@ -140,117 +140,7 @@ enum class SurfaceTarget { | |||
| 140 | TextureCubeArray, | 140 | TextureCubeArray, |
| 141 | }; | 141 | }; |
| 142 | 142 | ||
| 143 | constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | 143 | constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ |
| 144 | 0, // A8B8G8R8_UNORM | ||
| 145 | 0, // A8B8G8R8_SNORM | ||
| 146 | 0, // A8B8G8R8_SINT | ||
| 147 | 0, // A8B8G8R8_UINT | ||
| 148 | 0, // R5G6B5_UNORM | ||
| 149 | 0, // B5G6R5_UNORM | ||
| 150 | 0, // A1R5G5B5_UNORM | ||
| 151 | 0, // A2B10G10R10_UNORM | ||
| 152 | 0, // A2B10G10R10_UINT | ||
| 153 | 0, // A1B5G5R5_UNORM | ||
| 154 | 0, // R8_UNORM | ||
| 155 | 0, // R8_SNORM | ||
| 156 | 0, // R8_SINT | ||
| 157 | 0, // R8_UINT | ||
| 158 | 0, // R16G16B16A16_FLOAT | ||
| 159 | 0, // R16G16B16A16_UNORM | ||
| 160 | 0, // R16G16B16A16_SNORM | ||
| 161 | 0, // R16G16B16A16_SINT | ||
| 162 | 0, // R16G16B16A16_UINT | ||
| 163 | 0, // B10G11R11_FLOAT | ||
| 164 | 0, // R32G32B32A32_UINT | ||
| 165 | 2, // BC1_RGBA_UNORM | ||
| 166 | 2, // BC2_UNORM | ||
| 167 | 2, // BC3_UNORM | ||
| 168 | 2, // BC4_UNORM | ||
| 169 | 2, // BC4_SNORM | ||
| 170 | 2, // BC5_UNORM | ||
| 171 | 2, // BC5_SNORM | ||
| 172 | 2, // BC7_UNORM | ||
| 173 | 2, // BC6H_UFLOAT | ||
| 174 | 2, // BC6H_SFLOAT | ||
| 175 | 2, // ASTC_2D_4X4_UNORM | ||
| 176 | 0, // B8G8R8A8_UNORM | ||
| 177 | 0, // R32G32B32A32_FLOAT | ||
| 178 | 0, // R32G32B32A32_SINT | ||
| 179 | 0, // R32G32_FLOAT | ||
| 180 | 0, // R32G32_SINT | ||
| 181 | 0, // R32_FLOAT | ||
| 182 | 0, // R16_FLOAT | ||
| 183 | 0, // R16_UNORM | ||
| 184 | 0, // R16_SNORM | ||
| 185 | 0, // R16_UINT | ||
| 186 | 0, // R16_SINT | ||
| 187 | 0, // R16G16_UNORM | ||
| 188 | 0, // R16G16_FLOAT | ||
| 189 | 0, // R16G16_UINT | ||
| 190 | 0, // R16G16_SINT | ||
| 191 | 0, // R16G16_SNORM | ||
| 192 | 0, // R32G32B32_FLOAT | ||
| 193 | 0, // A8B8G8R8_SRGB | ||
| 194 | 0, // R8G8_UNORM | ||
| 195 | 0, // R8G8_SNORM | ||
| 196 | 0, // R8G8_SINT | ||
| 197 | 0, // R8G8_UINT | ||
| 198 | 0, // R32G32_UINT | ||
| 199 | 0, // R16G16B16X16_FLOAT | ||
| 200 | 0, // R32_UINT | ||
| 201 | 0, // R32_SINT | ||
| 202 | 2, // ASTC_2D_8X8_UNORM | ||
| 203 | 2, // ASTC_2D_8X5_UNORM | ||
| 204 | 2, // ASTC_2D_5X4_UNORM | ||
| 205 | 0, // B8G8R8A8_SRGB | ||
| 206 | 2, // BC1_RGBA_SRGB | ||
| 207 | 2, // BC2_SRGB | ||
| 208 | 2, // BC3_SRGB | ||
| 209 | 2, // BC7_SRGB | ||
| 210 | 0, // A4B4G4R4_UNORM | ||
| 211 | 2, // ASTC_2D_4X4_SRGB | ||
| 212 | 2, // ASTC_2D_8X8_SRGB | ||
| 213 | 2, // ASTC_2D_8X5_SRGB | ||
| 214 | 2, // ASTC_2D_5X4_SRGB | ||
| 215 | 2, // ASTC_2D_5X5_UNORM | ||
| 216 | 2, // ASTC_2D_5X5_SRGB | ||
| 217 | 2, // ASTC_2D_10X8_UNORM | ||
| 218 | 2, // ASTC_2D_10X8_SRGB | ||
| 219 | 2, // ASTC_2D_6X6_UNORM | ||
| 220 | 2, // ASTC_2D_6X6_SRGB | ||
| 221 | 2, // ASTC_2D_10X10_UNORM | ||
| 222 | 2, // ASTC_2D_10X10_SRGB | ||
| 223 | 2, // ASTC_2D_12X12_UNORM | ||
| 224 | 2, // ASTC_2D_12X12_SRGB | ||
| 225 | 2, // ASTC_2D_8X6_UNORM | ||
| 226 | 2, // ASTC_2D_8X6_SRGB | ||
| 227 | 2, // ASTC_2D_6X5_UNORM | ||
| 228 | 2, // ASTC_2D_6X5_SRGB | ||
| 229 | 0, // E5B9G9R9_FLOAT | ||
| 230 | 0, // D32_FLOAT | ||
| 231 | 0, // D16_UNORM | ||
| 232 | 0, // D24_UNORM_S8_UINT | ||
| 233 | 0, // S8_UINT_D24_UNORM | ||
| 234 | 0, // D32_FLOAT_S8_UINT | ||
| 235 | }}; | ||
| 236 | |||
| 237 | /** | ||
| 238 | * Gets the compression factor for the specified PixelFormat. This applies to just the | ||
| 239 | * "compressed width" and "compressed height", not the overall compression factor of a | ||
| 240 | * compressed image. This is used for maintaining proper surface sizes for compressed | ||
| 241 | * texture formats. | ||
| 242 | */ | ||
| 243 | inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { | ||
| 244 | DEBUG_ASSERT(format != PixelFormat::Invalid); | ||
| 245 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); | ||
| 246 | return compression_factor_shift_table[static_cast<std::size_t>(format)]; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline constexpr u32 GetCompressionFactor(PixelFormat format) { | ||
| 250 | return 1U << GetCompressionFactorShift(format); | ||
| 251 | } | ||
| 252 | |||
| 253 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | ||
| 254 | 1, // A8B8G8R8_UNORM | 144 | 1, // A8B8G8R8_UNORM |
| 255 | 1, // A8B8G8R8_SNORM | 145 | 1, // A8B8G8R8_SNORM |
| 256 | 1, // A8B8G8R8_SINT | 146 | 1, // A8B8G8R8_SINT |
| @@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 344 | 1, // D32_FLOAT_S8_UINT | 234 | 1, // D32_FLOAT_S8_UINT |
| 345 | }}; | 235 | }}; |
| 346 | 236 | ||
| 347 | static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { | 237 | constexpr u32 DefaultBlockWidth(PixelFormat format) { |
| 348 | if (format == PixelFormat::Invalid) | 238 | ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size()); |
| 349 | return 0; | 239 | return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)]; |
| 350 | |||
| 351 | ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); | ||
| 352 | return block_width_table[static_cast<std::size_t>(format)]; | ||
| 353 | } | 240 | } |
| 354 | 241 | ||
| 355 | constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | 242 | constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ |
| 356 | 1, // A8B8G8R8_UNORM | 243 | 1, // A8B8G8R8_UNORM |
| 357 | 1, // A8B8G8R8_SNORM | 244 | 1, // A8B8G8R8_SNORM |
| 358 | 1, // A8B8G8R8_SINT | 245 | 1, // A8B8G8R8_SINT |
| @@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 446 | 1, // D32_FLOAT_S8_UINT | 333 | 1, // D32_FLOAT_S8_UINT |
| 447 | }}; | 334 | }}; |
| 448 | 335 | ||
| 449 | static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { | 336 | constexpr u32 DefaultBlockHeight(PixelFormat format) { |
| 450 | if (format == PixelFormat::Invalid) | 337 | ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size()); |
| 451 | return 0; | 338 | return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)]; |
| 452 | |||
| 453 | ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); | ||
| 454 | return block_height_table[static_cast<std::size_t>(format)]; | ||
| 455 | } | 339 | } |
| 456 | 340 | ||
| 457 | constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | 341 | constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ |
| 458 | 32, // A8B8G8R8_UNORM | 342 | 32, // A8B8G8R8_UNORM |
| 459 | 32, // A8B8G8R8_SNORM | 343 | 32, // A8B8G8R8_SNORM |
| 460 | 32, // A8B8G8R8_SINT | 344 | 32, // A8B8G8R8_SINT |
| @@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 548 | 64, // D32_FLOAT_S8_UINT | 432 | 64, // D32_FLOAT_S8_UINT |
| 549 | }}; | 433 | }}; |
| 550 | 434 | ||
| 551 | static constexpr u32 GetFormatBpp(PixelFormat format) { | 435 | constexpr u32 BitsPerBlock(PixelFormat format) { |
| 552 | if (format == PixelFormat::Invalid) | 436 | ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size()); |
| 553 | return 0; | 437 | return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)]; |
| 554 | |||
| 555 | ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); | ||
| 556 | return bpp_table[static_cast<std::size_t>(format)]; | ||
| 557 | } | 438 | } |
| 558 | 439 | ||
| 559 | /// Returns the sizer in bytes of the specified pixel format | 440 | /// Returns the sizer in bytes of the specified pixel format |
| 560 | static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { | 441 | constexpr u32 BytesPerBlock(PixelFormat pixel_format) { |
| 561 | if (pixel_format == PixelFormat::Invalid) { | 442 | return BitsPerBlock(pixel_format) / CHAR_BIT; |
| 562 | return 0; | ||
| 563 | } | ||
| 564 | return GetFormatBpp(pixel_format) / CHAR_BIT; | ||
| 565 | } | 443 | } |
| 566 | 444 | ||
| 567 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); | 445 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); |
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/div_ceil.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 13 | #include "video_core/texture_cache/util.h" | ||
| 14 | #include "video_core/textures/decoders.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Accelerated { | ||
| 17 | |||
| 18 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 19 | using Tegra::Texture::GOB_SIZE_X; | ||
| 20 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 21 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 22 | using VideoCore::Surface::BytesPerBlock; | ||
| 23 | |||
| 24 | BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, | ||
| 25 | const ImageInfo& info) { | ||
| 26 | const Extent3D block = swizzle.block; | ||
| 27 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 28 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 29 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 30 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 31 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||
| 32 | return BlockLinearSwizzle2DParams{ | ||
| 33 | .origin{0, 0, 0}, | ||
| 34 | .destination{0, 0, 0}, | ||
| 35 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 36 | .layer_stride = info.layer_stride, | ||
| 37 | .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), | ||
| 38 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 39 | .block_height = block.height, | ||
| 40 | .block_height_mask = (1U << block.height) - 1, | ||
| 41 | }; | ||
| 42 | } | ||
| 43 | |||
| 44 | BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, | ||
| 45 | const ImageInfo& info) { | ||
| 46 | const Extent3D block = swizzle.block; | ||
| 47 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 48 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 49 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 50 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 51 | |||
| 52 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||
| 53 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||
| 54 | const u32 slice_size = | ||
| 55 | Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; | ||
| 56 | return BlockLinearSwizzle3DParams{ | ||
| 57 | .origin{0, 0, 0}, | ||
| 58 | .destination{0, 0, 0}, | ||
| 59 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 60 | .slice_size = slice_size, | ||
| 61 | .block_size = block_size, | ||
| 62 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 63 | .block_height = block.height, | ||
| 64 | .block_height_mask = (1U << block.height) - 1, | ||
| 65 | .block_depth = block.depth, | ||
| 66 | .block_depth_mask = (1U << block.depth) - 1, | ||
| 67 | }; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Accelerated \ No newline at end of file | ||
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Accelerated { | ||
| 14 | |||
| 15 | struct BlockLinearSwizzle2DParams { | ||
| 16 | std::array<u32, 3> origin; | ||
| 17 | std::array<s32, 3> destination; | ||
| 18 | u32 bytes_per_block_log2; | ||
| 19 | u32 layer_stride; | ||
| 20 | u32 block_size; | ||
| 21 | u32 x_shift; | ||
| 22 | u32 block_height; | ||
| 23 | u32 block_height_mask; | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct BlockLinearSwizzle3DParams { | ||
| 27 | std::array<u32, 3> origin; | ||
| 28 | std::array<s32, 3> destination; | ||
| 29 | u32 bytes_per_block_log2; | ||
| 30 | u32 slice_size; | ||
| 31 | u32 block_size; | ||
| 32 | u32 x_shift; | ||
| 33 | u32 block_height; | ||
| 34 | u32 block_height_mask; | ||
| 35 | u32 block_depth; | ||
| 36 | u32 block_depth_mask; | ||
| 37 | }; | ||
| 38 | |||
| 39 | [[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( | ||
| 40 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 41 | |||
| 42 | [[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( | ||
| 43 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Accelerated | ||
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 5b475fe06..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null | |||
| @@ -1,36 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | struct CopyParams { | ||
| 12 | constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, | ||
| 13 | u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, | ||
| 14 | u32 depth_) | ||
| 15 | : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, | ||
| 16 | dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, | ||
| 17 | dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} | ||
| 18 | |||
| 19 | constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) | ||
| 20 | : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, | ||
| 21 | dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} | ||
| 22 | |||
| 23 | u32 source_x; | ||
| 24 | u32 source_y; | ||
| 25 | u32 source_z; | ||
| 26 | u32 dest_x; | ||
| 27 | u32 dest_y; | ||
| 28 | u32 dest_z; | ||
| 29 | u32 source_level; | ||
| 30 | u32 dest_level; | ||
| 31 | u32 width; | ||
| 32 | u32 height; | ||
| 33 | u32 depth; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 17 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 18 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 19 | const u32 code = (bits >> code_offset) & 7; | ||
| 20 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 21 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 22 | if (red0 > red1) { | ||
| 23 | switch (code) { | ||
| 24 | case 0: | ||
| 25 | return red0; | ||
| 26 | case 1: | ||
| 27 | return red1; | ||
| 28 | case 2: | ||
| 29 | return (6 * red0 + 1 * red1) / 7; | ||
| 30 | case 3: | ||
| 31 | return (5 * red0 + 2 * red1) / 7; | ||
| 32 | case 4: | ||
| 33 | return (4 * red0 + 3 * red1) / 7; | ||
| 34 | case 5: | ||
| 35 | return (3 * red0 + 4 * red1) / 7; | ||
| 36 | case 6: | ||
| 37 | return (2 * red0 + 5 * red1) / 7; | ||
| 38 | case 7: | ||
| 39 | return (1 * red0 + 6 * red1) / 7; | ||
| 40 | } | ||
| 41 | } else { | ||
| 42 | switch (code) { | ||
| 43 | case 0: | ||
| 44 | return red0; | ||
| 45 | case 1: | ||
| 46 | return red1; | ||
| 47 | case 2: | ||
| 48 | return (4 * red0 + 1 * red1) / 5; | ||
| 49 | case 3: | ||
| 50 | return (3 * red0 + 2 * red1) / 5; | ||
| 51 | case 4: | ||
| 52 | return (2 * red0 + 3 * red1) / 5; | ||
| 53 | case 5: | ||
| 54 | return (1 * red0 + 4 * red1) / 5; | ||
| 55 | case 6: | ||
| 56 | return 0; | ||
| 57 | case 7: | ||
| 58 | return 0xff; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 66 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 67 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 68 | size_t input_offset = 0; | ||
| 69 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 70 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 71 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 72 | u64 bits; | ||
| 73 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 74 | input_offset += sizeof(bits); | ||
| 75 | |||
| 76 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 77 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 78 | const u32 linear_z = slice; | ||
| 79 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 80 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 81 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 82 | const u32 offset_y = linear_y * extent.width; | ||
| 83 | const u32 offset_x = linear_x; | ||
| 84 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 85 | const u32 color = DecompressBlock(bits, x, y); | ||
| 86 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 87 | output[output_offset + 1] = 0; | ||
| 88 | output[output_offset + 2] = 0; | ||
| 89 | output[output_offset + 3] = 0xff; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | ||
| 15 | |||
| 16 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/div_ceil.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/rasterizer_interface.h" | ||
| 15 | |||
| 16 | namespace VideoCommon { | ||
| 17 | |||
| 18 | template <typename Descriptor> | ||
| 19 | class DescriptorTable { | ||
| 20 | public: | ||
| 21 | explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} | ||
| 22 | |||
| 23 | [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { | ||
| 24 | [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { | ||
| 25 | return false; | ||
| 26 | } | ||
| 27 | Refresh(gpu_addr, limit); | ||
| 28 | return true; | ||
| 29 | } | ||
| 30 | |||
| 31 | void Invalidate() noexcept { | ||
| 32 | std::ranges::fill(read_descriptors, 0); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { | ||
| 36 | DEBUG_ASSERT(index <= current_limit); | ||
| 37 | const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); | ||
| 38 | std::pair<Descriptor, bool> result; | ||
| 39 | gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); | ||
| 40 | if (IsDescriptorRead(index)) { | ||
| 41 | result.second = result.first != descriptors[index]; | ||
| 42 | } else { | ||
| 43 | MarkDescriptorAsRead(index); | ||
| 44 | result.second = true; | ||
| 45 | } | ||
| 46 | if (result.second) { | ||
| 47 | descriptors[index] = result.first; | ||
| 48 | } | ||
| 49 | return result; | ||
| 50 | } | ||
| 51 | |||
| 52 | [[nodiscard]] u32 Limit() const noexcept { | ||
| 53 | return current_limit; | ||
| 54 | } | ||
| 55 | |||
| 56 | private: | ||
| 57 | void Refresh(GPUVAddr gpu_addr, u32 limit) { | ||
| 58 | current_gpu_addr = gpu_addr; | ||
| 59 | current_limit = limit; | ||
| 60 | |||
| 61 | const size_t num_descriptors = static_cast<size_t>(limit) + 1; | ||
| 62 | read_descriptors.clear(); | ||
| 63 | read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); | ||
| 64 | descriptors.resize(num_descriptors); | ||
| 65 | } | ||
| 66 | |||
| 67 | void MarkDescriptorAsRead(u32 index) noexcept { | ||
| 68 | read_descriptors[index / 64] |= 1ULL << (index % 64); | ||
| 69 | } | ||
| 70 | |||
| 71 | [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { | ||
| 72 | return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | Tegra::MemoryManager& gpu_memory; | ||
| 76 | GPUVAddr current_gpu_addr{}; | ||
| 77 | u32 current_limit{}; | ||
| 78 | std::vector<u64> read_descriptors; | ||
| 79 | std::vector<Descriptor> descriptors; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7938d71eb..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 7 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 8 | #include "video_core/texture_cache/format_lookup_table.h" | 7 | #include "video_core/texture_cache/format_lookup_table.h" |
| @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; | |||
| 20 | constexpr auto SINT = ComponentType::SINT; | 19 | constexpr auto SINT = ComponentType::SINT; |
| 21 | constexpr auto UINT = ComponentType::UINT; | 20 | constexpr auto UINT = ComponentType::UINT; |
| 22 | constexpr auto FLOAT = ComponentType::FLOAT; | 21 | constexpr auto FLOAT = ComponentType::FLOAT; |
| 23 | constexpr bool C = false; // Normal color | 22 | constexpr bool LINEAR = false; |
| 24 | constexpr bool S = true; // Srgb | 23 | constexpr bool SRGB = true; |
| 25 | 24 | ||
| 26 | struct Table { | 25 | constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, |
| 27 | constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, | 26 | ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { |
| 28 | ComponentType green_component_, ComponentType blue_component_, | 27 | u32 hash = is_srgb ? 1 : 0; |
| 29 | ComponentType alpha_component_, PixelFormat pixel_format_) | 28 | hash |= static_cast<u32>(red_component) << 1; |
| 30 | : texture_format{texture_format_}, pixel_format{pixel_format_}, | 29 | hash |= static_cast<u32>(green_component) << 4; |
| 31 | red_component{red_component_}, green_component{green_component_}, | 30 | hash |= static_cast<u32>(blue_component) << 7; |
| 32 | blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} | 31 | hash |= static_cast<u32>(alpha_component) << 10; |
| 33 | 32 | hash |= static_cast<u32>(format) << 13; | |
| 34 | TextureFormat texture_format; | 33 | return hash; |
| 35 | PixelFormat pixel_format; | 34 | } |
| 36 | ComponentType red_component; | ||
| 37 | ComponentType green_component; | ||
| 38 | ComponentType blue_component; | ||
| 39 | ComponentType alpha_component; | ||
| 40 | bool is_srgb; | ||
| 41 | }; | ||
| 42 | constexpr std::array<Table, 86> DefinitionTable = {{ | ||
| 43 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, | ||
| 44 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, | ||
| 45 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, | ||
| 46 | {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, | ||
| 47 | {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, | ||
| 48 | |||
| 49 | {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, | ||
| 50 | |||
| 51 | {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, | ||
| 52 | {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, | ||
| 53 | |||
| 54 | {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, | ||
| 55 | |||
| 56 | {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, | ||
| 57 | |||
| 58 | {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, | ||
| 59 | {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, | ||
| 60 | {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, | ||
| 61 | {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, | ||
| 62 | |||
| 63 | {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, | ||
| 64 | {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, | ||
| 65 | {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, | ||
| 66 | {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, | ||
| 67 | |||
| 68 | {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, | ||
| 69 | {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, | ||
| 70 | {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, | ||
| 71 | {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, | ||
| 72 | {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, | ||
| 73 | |||
| 74 | {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, | ||
| 75 | {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, | ||
| 76 | {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, | ||
| 77 | {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, | ||
| 78 | {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, | ||
| 79 | |||
| 80 | {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, | ||
| 81 | {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, | ||
| 82 | {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, | ||
| 83 | {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, | ||
| 84 | {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, | ||
| 85 | |||
| 86 | {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, | ||
| 87 | |||
| 88 | {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, | ||
| 89 | {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, | ||
| 90 | {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, | ||
| 91 | |||
| 92 | {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, | ||
| 93 | |||
| 94 | {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, | ||
| 95 | {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, | ||
| 96 | {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, | ||
| 97 | |||
| 98 | {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, | ||
| 99 | {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, | ||
| 100 | {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, | ||
| 101 | |||
| 102 | {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, | ||
| 103 | |||
| 104 | {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, | ||
| 105 | {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, | ||
| 106 | {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 107 | {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 108 | {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, | ||
| 109 | |||
| 110 | {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, | ||
| 111 | {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, | ||
| 112 | |||
| 113 | {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, | ||
| 114 | {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, | ||
| 115 | |||
| 116 | {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, | ||
| 117 | {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, | ||
| 118 | |||
| 119 | {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, | ||
| 120 | {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, | ||
| 121 | |||
| 122 | {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, | ||
| 123 | {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, | ||
| 124 | |||
| 125 | {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, | ||
| 126 | {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, | ||
| 127 | |||
| 128 | {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, | ||
| 129 | {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, | ||
| 130 | |||
| 131 | {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, | ||
| 132 | {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, | ||
| 133 | |||
| 134 | {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, | ||
| 135 | {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, | ||
| 136 | |||
| 137 | {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, | ||
| 138 | {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, | ||
| 139 | |||
| 140 | {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, | ||
| 141 | {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, | ||
| 142 | |||
| 143 | {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, | ||
| 144 | {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, | ||
| 145 | |||
| 146 | {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, | ||
| 147 | {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, | ||
| 148 | |||
| 149 | {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, | ||
| 150 | {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, | ||
| 151 | |||
| 152 | {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, | ||
| 153 | {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, | ||
| 154 | |||
| 155 | {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, | ||
| 156 | {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, | ||
| 157 | |||
| 158 | {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, | ||
| 159 | {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, | ||
| 160 | 35 | ||
| 161 | {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, | 36 | constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { |
| 162 | {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, | 37 | return Hash(format, component, component, component, component, is_srgb); |
| 163 | }}; | 38 | } |
| 164 | 39 | ||
| 165 | } // Anonymous namespace | 40 | } // Anonymous namespace |
| 166 | 41 | ||
| 167 | FormatLookupTable::FormatLookupTable() { | 42 | PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, |
| 168 | table.fill(static_cast<u8>(PixelFormat::Invalid)); | 43 | ComponentType blue, ComponentType alpha, |
| 169 | 44 | bool is_srgb) noexcept { | |
| 170 | for (const auto& entry : DefinitionTable) { | 45 | switch (Hash(format, red, green, blue, alpha, is_srgb)) { |
| 171 | table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, | 46 | case Hash(TextureFormat::A8R8G8B8, UNORM): |
| 172 | entry.green_component, entry.blue_component, entry.alpha_component)] = | 47 | return PixelFormat::A8B8G8R8_UNORM; |
| 173 | static_cast<u8>(entry.pixel_format); | 48 | case Hash(TextureFormat::A8R8G8B8, SNORM): |
| 174 | } | 49 | return PixelFormat::A8B8G8R8_SNORM; |
| 175 | } | 50 | case Hash(TextureFormat::A8R8G8B8, UINT): |
| 176 | 51 | return PixelFormat::A8B8G8R8_UINT; | |
| 177 | PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, | 52 | case Hash(TextureFormat::A8R8G8B8, SINT): |
| 178 | ComponentType red_component, | 53 | return PixelFormat::A8B8G8R8_SINT; |
| 179 | ComponentType green_component, | 54 | case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): |
| 180 | ComponentType blue_component, | 55 | return PixelFormat::A8B8G8R8_SRGB; |
| 181 | ComponentType alpha_component) const noexcept { | 56 | case Hash(TextureFormat::B5G6R5, UNORM): |
| 182 | const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( | 57 | return PixelFormat::B5G6R5_UNORM; |
| 183 | format, is_srgb, red_component, green_component, blue_component, alpha_component)]); | 58 | case Hash(TextureFormat::A2B10G10R10, UNORM): |
| 184 | // [[likely]] | 59 | return PixelFormat::A2B10G10R10_UNORM; |
| 185 | if (pixel_format != PixelFormat::Invalid) { | 60 | case Hash(TextureFormat::A2B10G10R10, UINT): |
| 186 | return pixel_format; | 61 | return PixelFormat::A2B10G10R10_UINT; |
| 62 | case Hash(TextureFormat::A1B5G5R5, UNORM): | ||
| 63 | return PixelFormat::A1B5G5R5_UNORM; | ||
| 64 | case Hash(TextureFormat::A4B4G4R4, UNORM): | ||
| 65 | return PixelFormat::A4B4G4R4_UNORM; | ||
| 66 | case Hash(TextureFormat::R8, UNORM): | ||
| 67 | return PixelFormat::R8_UNORM; | ||
| 68 | case Hash(TextureFormat::R8, SNORM): | ||
| 69 | return PixelFormat::R8_SNORM; | ||
| 70 | case Hash(TextureFormat::R8, UINT): | ||
| 71 | return PixelFormat::R8_UINT; | ||
| 72 | case Hash(TextureFormat::R8, SINT): | ||
| 73 | return PixelFormat::R8_SINT; | ||
| 74 | case Hash(TextureFormat::R8G8, UNORM): | ||
| 75 | return PixelFormat::R8G8_UNORM; | ||
| 76 | case Hash(TextureFormat::R8G8, SNORM): | ||
| 77 | return PixelFormat::R8G8_SNORM; | ||
| 78 | case Hash(TextureFormat::R8G8, UINT): | ||
| 79 | return PixelFormat::R8G8_UINT; | ||
| 80 | case Hash(TextureFormat::R8G8, SINT): | ||
| 81 | return PixelFormat::R8G8_SINT; | ||
| 82 | case Hash(TextureFormat::R16G16B16A16, FLOAT): | ||
| 83 | return PixelFormat::R16G16B16A16_FLOAT; | ||
| 84 | case Hash(TextureFormat::R16G16B16A16, UNORM): | ||
| 85 | return PixelFormat::R16G16B16A16_UNORM; | ||
| 86 | case Hash(TextureFormat::R16G16B16A16, SNORM): | ||
| 87 | return PixelFormat::R16G16B16A16_SNORM; | ||
| 88 | case Hash(TextureFormat::R16G16B16A16, UINT): | ||
| 89 | return PixelFormat::R16G16B16A16_UINT; | ||
| 90 | case Hash(TextureFormat::R16G16B16A16, SINT): | ||
| 91 | return PixelFormat::R16G16B16A16_SINT; | ||
| 92 | case Hash(TextureFormat::R16G16, FLOAT): | ||
| 93 | return PixelFormat::R16G16_FLOAT; | ||
| 94 | case Hash(TextureFormat::R16G16, UNORM): | ||
| 95 | return PixelFormat::R16G16_UNORM; | ||
| 96 | case Hash(TextureFormat::R16G16, SNORM): | ||
| 97 | return PixelFormat::R16G16_SNORM; | ||
| 98 | case Hash(TextureFormat::R16G16, UINT): | ||
| 99 | return PixelFormat::R16G16_UINT; | ||
| 100 | case Hash(TextureFormat::R16G16, SINT): | ||
| 101 | return PixelFormat::R16G16_SINT; | ||
| 102 | case Hash(TextureFormat::R16, FLOAT): | ||
| 103 | return PixelFormat::R16_FLOAT; | ||
| 104 | case Hash(TextureFormat::R16, UNORM): | ||
| 105 | return PixelFormat::R16_UNORM; | ||
| 106 | case Hash(TextureFormat::R16, SNORM): | ||
| 107 | return PixelFormat::R16_SNORM; | ||
| 108 | case Hash(TextureFormat::R16, UINT): | ||
| 109 | return PixelFormat::R16_UINT; | ||
| 110 | case Hash(TextureFormat::R16, SINT): | ||
| 111 | return PixelFormat::R16_SINT; | ||
| 112 | case Hash(TextureFormat::B10G11R11, FLOAT): | ||
| 113 | return PixelFormat::B10G11R11_FLOAT; | ||
| 114 | case Hash(TextureFormat::R32G32B32A32, FLOAT): | ||
| 115 | return PixelFormat::R32G32B32A32_FLOAT; | ||
| 116 | case Hash(TextureFormat::R32G32B32A32, UINT): | ||
| 117 | return PixelFormat::R32G32B32A32_UINT; | ||
| 118 | case Hash(TextureFormat::R32G32B32A32, SINT): | ||
| 119 | return PixelFormat::R32G32B32A32_SINT; | ||
| 120 | case Hash(TextureFormat::R32G32B32, FLOAT): | ||
| 121 | return PixelFormat::R32G32B32_FLOAT; | ||
| 122 | case Hash(TextureFormat::R32G32, FLOAT): | ||
| 123 | return PixelFormat::R32G32_FLOAT; | ||
| 124 | case Hash(TextureFormat::R32G32, UINT): | ||
| 125 | return PixelFormat::R32G32_UINT; | ||
| 126 | case Hash(TextureFormat::R32G32, SINT): | ||
| 127 | return PixelFormat::R32G32_SINT; | ||
| 128 | case Hash(TextureFormat::R32, FLOAT): | ||
| 129 | return PixelFormat::R32_FLOAT; | ||
| 130 | case Hash(TextureFormat::R32, UINT): | ||
| 131 | return PixelFormat::R32_UINT; | ||
| 132 | case Hash(TextureFormat::R32, SINT): | ||
| 133 | return PixelFormat::R32_SINT; | ||
| 134 | case Hash(TextureFormat::E5B9G9R9, FLOAT): | ||
| 135 | return PixelFormat::E5B9G9R9_FLOAT; | ||
| 136 | case Hash(TextureFormat::D32, FLOAT): | ||
| 137 | return PixelFormat::D32_FLOAT; | ||
| 138 | case Hash(TextureFormat::D16, UNORM): | ||
| 139 | return PixelFormat::D16_UNORM; | ||
| 140 | case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 141 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 142 | case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 143 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 144 | case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): | ||
| 145 | return PixelFormat::D32_FLOAT_S8_UINT; | ||
| 146 | case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): | ||
| 147 | return PixelFormat::BC1_RGBA_UNORM; | ||
| 148 | case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): | ||
| 149 | return PixelFormat::BC1_RGBA_SRGB; | ||
| 150 | case Hash(TextureFormat::BC2, UNORM, LINEAR): | ||
| 151 | return PixelFormat::BC2_UNORM; | ||
| 152 | case Hash(TextureFormat::BC2, UNORM, SRGB): | ||
| 153 | return PixelFormat::BC2_SRGB; | ||
| 154 | case Hash(TextureFormat::BC3, UNORM, LINEAR): | ||
| 155 | return PixelFormat::BC3_UNORM; | ||
| 156 | case Hash(TextureFormat::BC3, UNORM, SRGB): | ||
| 157 | return PixelFormat::BC3_SRGB; | ||
| 158 | case Hash(TextureFormat::BC4, UNORM): | ||
| 159 | return PixelFormat::BC4_UNORM; | ||
| 160 | case Hash(TextureFormat::BC4, SNORM): | ||
| 161 | return PixelFormat::BC4_SNORM; | ||
| 162 | case Hash(TextureFormat::BC5, UNORM): | ||
| 163 | return PixelFormat::BC5_UNORM; | ||
| 164 | case Hash(TextureFormat::BC5, SNORM): | ||
| 165 | return PixelFormat::BC5_SNORM; | ||
| 166 | case Hash(TextureFormat::BC7, UNORM, LINEAR): | ||
| 167 | return PixelFormat::BC7_UNORM; | ||
| 168 | case Hash(TextureFormat::BC7, UNORM, SRGB): | ||
| 169 | return PixelFormat::BC7_SRGB; | ||
| 170 | case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): | ||
| 171 | return PixelFormat::BC6H_SFLOAT; | ||
| 172 | case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): | ||
| 173 | return PixelFormat::BC6H_UFLOAT; | ||
| 174 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): | ||
| 175 | return PixelFormat::ASTC_2D_4X4_UNORM; | ||
| 176 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): | ||
| 177 | return PixelFormat::ASTC_2D_4X4_SRGB; | ||
| 178 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): | ||
| 179 | return PixelFormat::ASTC_2D_5X4_UNORM; | ||
| 180 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): | ||
| 181 | return PixelFormat::ASTC_2D_5X4_SRGB; | ||
| 182 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): | ||
| 183 | return PixelFormat::ASTC_2D_5X5_UNORM; | ||
| 184 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): | ||
| 185 | return PixelFormat::ASTC_2D_5X5_SRGB; | ||
| 186 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): | ||
| 187 | return PixelFormat::ASTC_2D_8X8_UNORM; | ||
| 188 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): | ||
| 189 | return PixelFormat::ASTC_2D_8X8_SRGB; | ||
| 190 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): | ||
| 191 | return PixelFormat::ASTC_2D_8X5_UNORM; | ||
| 192 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): | ||
| 193 | return PixelFormat::ASTC_2D_8X5_SRGB; | ||
| 194 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): | ||
| 195 | return PixelFormat::ASTC_2D_10X8_UNORM; | ||
| 196 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): | ||
| 197 | return PixelFormat::ASTC_2D_10X8_SRGB; | ||
| 198 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): | ||
| 199 | return PixelFormat::ASTC_2D_6X6_UNORM; | ||
| 200 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): | ||
| 201 | return PixelFormat::ASTC_2D_6X6_SRGB; | ||
| 202 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): | ||
| 203 | return PixelFormat::ASTC_2D_10X10_UNORM; | ||
| 204 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): | ||
| 205 | return PixelFormat::ASTC_2D_10X10_SRGB; | ||
| 206 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): | ||
| 207 | return PixelFormat::ASTC_2D_12X12_UNORM; | ||
| 208 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): | ||
| 209 | return PixelFormat::ASTC_2D_12X12_SRGB; | ||
| 210 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): | ||
| 211 | return PixelFormat::ASTC_2D_8X6_UNORM; | ||
| 212 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): | ||
| 213 | return PixelFormat::ASTC_2D_8X6_SRGB; | ||
| 214 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): | ||
| 215 | return PixelFormat::ASTC_2D_6X5_UNORM; | ||
| 216 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): | ||
| 217 | return PixelFormat::ASTC_2D_6X5_SRGB; | ||
| 187 | } | 218 | } |
| 188 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", | 219 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", |
| 189 | static_cast<int>(format), is_srgb, static_cast<int>(red_component), | 220 | static_cast<int>(format), is_srgb, static_cast<int>(red), |
| 190 | static_cast<int>(green_component), static_cast<int>(blue_component), | 221 | static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha)); |
| 191 | static_cast<int>(alpha_component)); | ||
| 192 | return PixelFormat::A8B8G8R8_UNORM; | 222 | return PixelFormat::A8B8G8R8_UNORM; |
| 193 | } | 223 | } |
| 194 | 224 | ||
| 195 | void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, | ||
| 196 | ComponentType green_component, ComponentType blue_component, | ||
| 197 | ComponentType alpha_component, PixelFormat pixel_format) {} | ||
| 198 | |||
| 199 | std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, | ||
| 200 | ComponentType red_component, | ||
| 201 | ComponentType green_component, | ||
| 202 | ComponentType blue_component, | ||
| 203 | ComponentType alpha_component) noexcept { | ||
| 204 | const auto format_index = static_cast<std::size_t>(format); | ||
| 205 | const auto red_index = static_cast<std::size_t>(red_component); | ||
| 206 | const auto green_index = static_cast<std::size_t>(green_component); | ||
| 207 | const auto blue_index = static_cast<std::size_t>(blue_component); | ||
| 208 | const auto alpha_index = static_cast<std::size_t>(alpha_component); | ||
| 209 | const std::size_t srgb_index = is_srgb ? 1 : 0; | ||
| 210 | |||
| 211 | return format_index * PerFormat + | ||
| 212 | srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + | ||
| 213 | alpha_index * PerComponent * PerComponent * PerComponent + | ||
| 214 | blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; | ||
| 215 | } | ||
| 216 | |||
| 217 | } // namespace VideoCommon | 225 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h | |||
| @@ -4,48 +4,14 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <limits> | ||
| 9 | #include "video_core/surface.h" | 7 | #include "video_core/surface.h" |
| 10 | #include "video_core/textures/texture.h" | 8 | #include "video_core/textures/texture.h" |
| 11 | 9 | ||
| 12 | namespace VideoCommon { | 10 | namespace VideoCommon { |
| 13 | 11 | ||
| 14 | class FormatLookupTable { | 12 | VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( |
| 15 | public: | 13 | Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, |
| 16 | explicit FormatLookupTable(); | 14 | Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, |
| 17 | 15 | Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; | |
| 18 | VideoCore::Surface::PixelFormat GetPixelFormat( | ||
| 19 | Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 20 | Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, | ||
| 21 | Tegra::Texture::ComponentType blue_component, | ||
| 22 | Tegra::Texture::ComponentType alpha_component) const noexcept; | ||
| 23 | |||
| 24 | private: | ||
| 25 | static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); | ||
| 26 | |||
| 27 | static constexpr std::size_t NumTextureFormats = 128; | ||
| 28 | |||
| 29 | static constexpr std::size_t PerComponent = 8; | ||
| 30 | static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; | ||
| 31 | static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; | ||
| 32 | static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; | ||
| 33 | static constexpr std::size_t PerFormat = PerComponents4 * 2; | ||
| 34 | |||
| 35 | static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 36 | Tegra::Texture::ComponentType red_component, | ||
| 37 | Tegra::Texture::ComponentType green_component, | ||
| 38 | Tegra::Texture::ComponentType blue_component, | ||
| 39 | Tegra::Texture::ComponentType alpha_component) noexcept; | ||
| 40 | |||
| 41 | void Set(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 42 | Tegra::Texture::ComponentType red_component, | ||
| 43 | Tegra::Texture::ComponentType green_component, | ||
| 44 | Tegra::Texture::ComponentType blue_component, | ||
| 45 | Tegra::Texture::ComponentType alpha_component, | ||
| 46 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 47 | |||
| 48 | std::array<u8, NumTextureFormats * PerFormat> table; | ||
| 49 | }; | ||
| 50 | 16 | ||
| 51 | } // namespace VideoCommon | 17 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "video_core/texture_cache/formatter.h" | ||
| 9 | #include "video_core/texture_cache/image_base.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/image_view_base.h" | ||
| 12 | #include "video_core/texture_cache/render_targets.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | std::string Name(const ImageBase& image) { | ||
| 17 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 18 | const ImageInfo& info = image.info; | ||
| 19 | const u32 width = info.size.width; | ||
| 20 | const u32 height = info.size.height; | ||
| 21 | const u32 depth = info.size.depth; | ||
| 22 | const u32 num_layers = image.info.resources.layers; | ||
| 23 | const u32 num_levels = image.info.resources.levels; | ||
| 24 | std::string resource; | ||
| 25 | if (num_layers > 1) { | ||
| 26 | resource += fmt::format(":L{}", num_layers); | ||
| 27 | } | ||
| 28 | if (num_levels > 1) { | ||
| 29 | resource += fmt::format(":M{}", num_levels); | ||
| 30 | } | ||
| 31 | switch (image.info.type) { | ||
| 32 | case ImageType::e1D: | ||
| 33 | return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); | ||
| 34 | case ImageType::e2D: | ||
| 35 | return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); | ||
| 36 | case ImageType::e3D: | ||
| 37 | return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); | ||
| 38 | case ImageType::Linear: | ||
| 39 | return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); | ||
| 40 | case ImageType::Buffer: | ||
| 41 | return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); | ||
| 42 | } | ||
| 43 | return "Invalid"; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | ||
| 47 | const u32 width = image_view.size.width; | ||
| 48 | const u32 height = image_view.size.height; | ||
| 49 | const u32 depth = image_view.size.depth; | ||
| 50 | const u32 num_levels = image_view.range.extent.levels; | ||
| 51 | const u32 num_layers = image_view.range.extent.layers; | ||
| 52 | |||
| 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | ||
| 54 | switch (type.value_or(image_view.type)) { | ||
| 55 | case ImageViewType::e1D: | ||
| 56 | return fmt::format("ImageView 1D {}{}", width, level); | ||
| 57 | case ImageViewType::e2D: | ||
| 58 | return fmt::format("ImageView 2D {}x{}{}", width, height, level); | ||
| 59 | case ImageViewType::Cube: | ||
| 60 | return fmt::format("ImageView Cube {}x{}{}", width, height, level); | ||
| 61 | case ImageViewType::e3D: | ||
| 62 | return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); | ||
| 63 | case ImageViewType::e1DArray: | ||
| 64 | return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); | ||
| 65 | case ImageViewType::e2DArray: | ||
| 66 | return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 67 | case ImageViewType::CubeArray: | ||
| 68 | return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 69 | case ImageViewType::Rect: | ||
| 70 | return fmt::format("ImageView Rect {}x{}{}", width, height, level); | ||
| 71 | case ImageViewType::Buffer: | ||
| 72 | return fmt::format("BufferView {}", width); | ||
| 73 | } | ||
| 74 | return "Invalid"; | ||
| 75 | } | ||
| 76 | |||
| 77 | std::string Name(const RenderTargets& render_targets) { | ||
| 78 | std::string_view debug_prefix; | ||
| 79 | const auto num_color = std::ranges::count_if( | ||
| 80 | render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); }); | ||
| 81 | if (render_targets.depth_buffer_id) { | ||
| 82 | debug_prefix = num_color > 0 ? "R" : "Z"; | ||
| 83 | } else { | ||
| 84 | debug_prefix = num_color > 0 ? "C" : "X"; | ||
| 85 | } | ||
| 86 | const Extent2D size = render_targets.size; | ||
| 87 | if (num_color > 0) { | ||
| 88 | return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, | ||
| 89 | size.height); | ||
| 90 | } else { | ||
| 91 | return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -0,0 +1,263 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | template <> | ||
| 15 | struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> { | ||
| 16 | template <typename FormatContext> | ||
| 17 | auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | const string_view name = [format] { | ||
| 20 | switch (format) { | ||
| 21 | case PixelFormat::A8B8G8R8_UNORM: | ||
| 22 | return "A8B8G8R8_UNORM"; | ||
| 23 | case PixelFormat::A8B8G8R8_SNORM: | ||
| 24 | return "A8B8G8R8_SNORM"; | ||
| 25 | case PixelFormat::A8B8G8R8_SINT: | ||
| 26 | return "A8B8G8R8_SINT"; | ||
| 27 | case PixelFormat::A8B8G8R8_UINT: | ||
| 28 | return "A8B8G8R8_UINT"; | ||
| 29 | case PixelFormat::R5G6B5_UNORM: | ||
| 30 | return "R5G6B5_UNORM"; | ||
| 31 | case PixelFormat::B5G6R5_UNORM: | ||
| 32 | return "B5G6R5_UNORM"; | ||
| 33 | case PixelFormat::A1R5G5B5_UNORM: | ||
| 34 | return "A1R5G5B5_UNORM"; | ||
| 35 | case PixelFormat::A2B10G10R10_UNORM: | ||
| 36 | return "A2B10G10R10_UNORM"; | ||
| 37 | case PixelFormat::A2B10G10R10_UINT: | ||
| 38 | return "A2B10G10R10_UINT"; | ||
| 39 | case PixelFormat::A1B5G5R5_UNORM: | ||
| 40 | return "A1B5G5R5_UNORM"; | ||
| 41 | case PixelFormat::R8_UNORM: | ||
| 42 | return "R8_UNORM"; | ||
| 43 | case PixelFormat::R8_SNORM: | ||
| 44 | return "R8_SNORM"; | ||
| 45 | case PixelFormat::R8_SINT: | ||
| 46 | return "R8_SINT"; | ||
| 47 | case PixelFormat::R8_UINT: | ||
| 48 | return "R8_UINT"; | ||
| 49 | case PixelFormat::R16G16B16A16_FLOAT: | ||
| 50 | return "R16G16B16A16_FLOAT"; | ||
| 51 | case PixelFormat::R16G16B16A16_UNORM: | ||
| 52 | return "R16G16B16A16_UNORM"; | ||
| 53 | case PixelFormat::R16G16B16A16_SNORM: | ||
| 54 | return "R16G16B16A16_SNORM"; | ||
| 55 | case PixelFormat::R16G16B16A16_SINT: | ||
| 56 | return "R16G16B16A16_SINT"; | ||
| 57 | case PixelFormat::R16G16B16A16_UINT: | ||
| 58 | return "R16G16B16A16_UINT"; | ||
| 59 | case PixelFormat::B10G11R11_FLOAT: | ||
| 60 | return "B10G11R11_FLOAT"; | ||
| 61 | case PixelFormat::R32G32B32A32_UINT: | ||
| 62 | return "R32G32B32A32_UINT"; | ||
| 63 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 64 | return "BC1_RGBA_UNORM"; | ||
| 65 | case PixelFormat::BC2_UNORM: | ||
| 66 | return "BC2_UNORM"; | ||
| 67 | case PixelFormat::BC3_UNORM: | ||
| 68 | return "BC3_UNORM"; | ||
| 69 | case PixelFormat::BC4_UNORM: | ||
| 70 | return "BC4_UNORM"; | ||
| 71 | case PixelFormat::BC4_SNORM: | ||
| 72 | return "BC4_SNORM"; | ||
| 73 | case PixelFormat::BC5_UNORM: | ||
| 74 | return "BC5_UNORM"; | ||
| 75 | case PixelFormat::BC5_SNORM: | ||
| 76 | return "BC5_SNORM"; | ||
| 77 | case PixelFormat::BC7_UNORM: | ||
| 78 | return "BC7_UNORM"; | ||
| 79 | case PixelFormat::BC6H_UFLOAT: | ||
| 80 | return "BC6H_UFLOAT"; | ||
| 81 | case PixelFormat::BC6H_SFLOAT: | ||
| 82 | return "BC6H_SFLOAT"; | ||
| 83 | case PixelFormat::ASTC_2D_4X4_UNORM: | ||
| 84 | return "ASTC_2D_4X4_UNORM"; | ||
| 85 | case PixelFormat::B8G8R8A8_UNORM: | ||
| 86 | return "B8G8R8A8_UNORM"; | ||
| 87 | case PixelFormat::R32G32B32A32_FLOAT: | ||
| 88 | return "R32G32B32A32_FLOAT"; | ||
| 89 | case PixelFormat::R32G32B32A32_SINT: | ||
| 90 | return "R32G32B32A32_SINT"; | ||
| 91 | case PixelFormat::R32G32_FLOAT: | ||
| 92 | return "R32G32_FLOAT"; | ||
| 93 | case PixelFormat::R32G32_SINT: | ||
| 94 | return "R32G32_SINT"; | ||
| 95 | case PixelFormat::R32_FLOAT: | ||
| 96 | return "R32_FLOAT"; | ||
| 97 | case PixelFormat::R16_FLOAT: | ||
| 98 | return "R16_FLOAT"; | ||
| 99 | case PixelFormat::R16_UNORM: | ||
| 100 | return "R16_UNORM"; | ||
| 101 | case PixelFormat::R16_SNORM: | ||
| 102 | return "R16_SNORM"; | ||
| 103 | case PixelFormat::R16_UINT: | ||
| 104 | return "R16_UINT"; | ||
| 105 | case PixelFormat::R16_SINT: | ||
| 106 | return "R16_SINT"; | ||
| 107 | case PixelFormat::R16G16_UNORM: | ||
| 108 | return "R16G16_UNORM"; | ||
| 109 | case PixelFormat::R16G16_FLOAT: | ||
| 110 | return "R16G16_FLOAT"; | ||
| 111 | case PixelFormat::R16G16_UINT: | ||
| 112 | return "R16G16_UINT"; | ||
| 113 | case PixelFormat::R16G16_SINT: | ||
| 114 | return "R16G16_SINT"; | ||
| 115 | case PixelFormat::R16G16_SNORM: | ||
| 116 | return "R16G16_SNORM"; | ||
| 117 | case PixelFormat::R32G32B32_FLOAT: | ||
| 118 | return "R32G32B32_FLOAT"; | ||
| 119 | case PixelFormat::A8B8G8R8_SRGB: | ||
| 120 | return "A8B8G8R8_SRGB"; | ||
| 121 | case PixelFormat::R8G8_UNORM: | ||
| 122 | return "R8G8_UNORM"; | ||
| 123 | case PixelFormat::R8G8_SNORM: | ||
| 124 | return "R8G8_SNORM"; | ||
| 125 | case PixelFormat::R8G8_SINT: | ||
| 126 | return "R8G8_SINT"; | ||
| 127 | case PixelFormat::R8G8_UINT: | ||
| 128 | return "R8G8_UINT"; | ||
| 129 | case PixelFormat::R32G32_UINT: | ||
| 130 | return "R32G32_UINT"; | ||
| 131 | case PixelFormat::R16G16B16X16_FLOAT: | ||
| 132 | return "R16G16B16X16_FLOAT"; | ||
| 133 | case PixelFormat::R32_UINT: | ||
| 134 | return "R32_UINT"; | ||
| 135 | case PixelFormat::R32_SINT: | ||
| 136 | return "R32_SINT"; | ||
| 137 | case PixelFormat::ASTC_2D_8X8_UNORM: | ||
| 138 | return "ASTC_2D_8X8_UNORM"; | ||
| 139 | case PixelFormat::ASTC_2D_8X5_UNORM: | ||
| 140 | return "ASTC_2D_8X5_UNORM"; | ||
| 141 | case PixelFormat::ASTC_2D_5X4_UNORM: | ||
| 142 | return "ASTC_2D_5X4_UNORM"; | ||
| 143 | case PixelFormat::B8G8R8A8_SRGB: | ||
| 144 | return "B8G8R8A8_SRGB"; | ||
| 145 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 146 | return "BC1_RGBA_SRGB"; | ||
| 147 | case PixelFormat::BC2_SRGB: | ||
| 148 | return "BC2_SRGB"; | ||
| 149 | case PixelFormat::BC3_SRGB: | ||
| 150 | return "BC3_SRGB"; | ||
| 151 | case PixelFormat::BC7_SRGB: | ||
| 152 | return "BC7_SRGB"; | ||
| 153 | case PixelFormat::A4B4G4R4_UNORM: | ||
| 154 | return "A4B4G4R4_UNORM"; | ||
| 155 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 156 | return "ASTC_2D_4X4_SRGB"; | ||
| 157 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 158 | return "ASTC_2D_8X8_SRGB"; | ||
| 159 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 160 | return "ASTC_2D_8X5_SRGB"; | ||
| 161 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 162 | return "ASTC_2D_5X4_SRGB"; | ||
| 163 | case PixelFormat::ASTC_2D_5X5_UNORM: | ||
| 164 | return "ASTC_2D_5X5_UNORM"; | ||
| 165 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 166 | return "ASTC_2D_5X5_SRGB"; | ||
| 167 | case PixelFormat::ASTC_2D_10X8_UNORM: | ||
| 168 | return "ASTC_2D_10X8_UNORM"; | ||
| 169 | case PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 170 | return "ASTC_2D_10X8_SRGB"; | ||
| 171 | case PixelFormat::ASTC_2D_6X6_UNORM: | ||
| 172 | return "ASTC_2D_6X6_UNORM"; | ||
| 173 | case PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 174 | return "ASTC_2D_6X6_SRGB"; | ||
| 175 | case PixelFormat::ASTC_2D_10X10_UNORM: | ||
| 176 | return "ASTC_2D_10X10_UNORM"; | ||
| 177 | case PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 178 | return "ASTC_2D_10X10_SRGB"; | ||
| 179 | case PixelFormat::ASTC_2D_12X12_UNORM: | ||
| 180 | return "ASTC_2D_12X12_UNORM"; | ||
| 181 | case PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 182 | return "ASTC_2D_12X12_SRGB"; | ||
| 183 | case PixelFormat::ASTC_2D_8X6_UNORM: | ||
| 184 | return "ASTC_2D_8X6_UNORM"; | ||
| 185 | case PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 186 | return "ASTC_2D_8X6_SRGB"; | ||
| 187 | case PixelFormat::ASTC_2D_6X5_UNORM: | ||
| 188 | return "ASTC_2D_6X5_UNORM"; | ||
| 189 | case PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 190 | return "ASTC_2D_6X5_SRGB"; | ||
| 191 | case PixelFormat::E5B9G9R9_FLOAT: | ||
| 192 | return "E5B9G9R9_FLOAT"; | ||
| 193 | case PixelFormat::D32_FLOAT: | ||
| 194 | return "D32_FLOAT"; | ||
| 195 | case PixelFormat::D16_UNORM: | ||
| 196 | return "D16_UNORM"; | ||
| 197 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 198 | return "D24_UNORM_S8_UINT"; | ||
| 199 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 200 | return "S8_UINT_D24_UNORM"; | ||
| 201 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 202 | return "D32_FLOAT_S8_UINT"; | ||
| 203 | case PixelFormat::MaxDepthStencilFormat: | ||
| 204 | case PixelFormat::Invalid: | ||
| 205 | return "Invalid"; | ||
| 206 | } | ||
| 207 | return "Invalid"; | ||
| 208 | }(); | ||
| 209 | return formatter<string_view>::format(name, ctx); | ||
| 210 | } | ||
| 211 | }; | ||
| 212 | |||
| 213 | template <> | ||
| 214 | struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> { | ||
| 215 | template <typename FormatContext> | ||
| 216 | auto format(VideoCommon::ImageType type, FormatContext& ctx) { | ||
| 217 | const string_view name = [type] { | ||
| 218 | using VideoCommon::ImageType; | ||
| 219 | switch (type) { | ||
| 220 | case ImageType::e1D: | ||
| 221 | return "1D"; | ||
| 222 | case ImageType::e2D: | ||
| 223 | return "2D"; | ||
| 224 | case ImageType::e3D: | ||
| 225 | return "3D"; | ||
| 226 | case ImageType::Linear: | ||
| 227 | return "Linear"; | ||
| 228 | case ImageType::Buffer: | ||
| 229 | return "Buffer"; | ||
| 230 | } | ||
| 231 | return "Invalid"; | ||
| 232 | }(); | ||
| 233 | return formatter<string_view>::format(name, ctx); | ||
| 234 | } | ||
| 235 | }; | ||
| 236 | |||
| 237 | template <> | ||
| 238 | struct fmt::formatter<VideoCommon::Extent3D> { | ||
| 239 | constexpr auto parse(fmt::format_parse_context& ctx) { | ||
| 240 | return ctx.begin(); | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename FormatContext> | ||
| 244 | auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { | ||
| 245 | return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, | ||
| 246 | extent.depth); | ||
| 247 | } | ||
| 248 | }; | ||
| 249 | |||
| 250 | namespace VideoCommon { | ||
| 251 | |||
| 252 | struct ImageBase; | ||
| 253 | struct ImageViewBase; | ||
| 254 | struct RenderTargets; | ||
| 255 | |||
| 256 | [[nodiscard]] std::string Name(const ImageBase& image); | ||
| 257 | |||
| 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, | ||
| 259 | std::optional<ImageViewType> type = std::nullopt); | ||
| 260 | |||
| 261 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | ||
| 262 | |||
| 263 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..959b3f115 --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/formatter.h" | ||
| 13 | #include "video_core/texture_cache/image_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/util.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 20 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | /// Returns the base layer and mip level offset | ||
| 24 | [[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) { | ||
| 25 | if (layer_stride == 0) { | ||
| 26 | return {0, diff}; | ||
| 27 | } else { | ||
| 28 | return {diff / layer_stride, diff % layer_stride}; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | [[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { | ||
| 33 | return layers.base_level < info.resources.levels && | ||
| 34 | layers.base_layer + layers.num_layers <= info.resources.layers; | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { | ||
| 38 | const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); | ||
| 39 | const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); | ||
| 40 | if (!ValidateLayers(copy.src_subresource, src)) { | ||
| 41 | return false; | ||
| 42 | } | ||
| 43 | if (!ValidateLayers(copy.dst_subresource, dst)) { | ||
| 44 | return false; | ||
| 45 | } | ||
| 46 | if (copy.src_offset.x + copy.extent.width > src_size.width || | ||
| 47 | copy.src_offset.y + copy.extent.height > src_size.height || | ||
| 48 | copy.src_offset.z + copy.extent.depth > src_size.depth) { | ||
| 49 | return false; | ||
| 50 | } | ||
| 51 | if (copy.dst_offset.x + copy.extent.width > dst_size.width || | ||
| 52 | copy.dst_offset.y + copy.extent.height > dst_size.height || | ||
| 53 | copy.dst_offset.z + copy.extent.depth > dst_size.depth) { | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | return true; | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) | ||
| 61 | : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, | ||
| 62 | unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, | ||
| 63 | converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, | ||
| 64 | cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, | ||
| 65 | mip_level_offsets{CalculateMipLevelOffsets(info)} { | ||
| 66 | if (info.type == ImageType::e3D) { | ||
| 67 | slice_offsets = CalculateSliceOffsets(info); | ||
| 68 | slice_subresources = CalculateSliceSubresources(info); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { | ||
| 73 | if (other_addr < gpu_addr) { | ||
| 74 | // Subresource address can't be lower than the base | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 diff = static_cast<u32>(other_addr - gpu_addr); | ||
| 78 | if (diff > guest_size_bytes) { | ||
| 79 | // This can happen when two CPU addresses are used for different GPU addresses | ||
| 80 | return std::nullopt; | ||
| 81 | } | ||
| 82 | if (info.type != ImageType::e3D) { | ||
| 83 | const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); | ||
| 84 | const auto end = mip_level_offsets.begin() + info.resources.levels; | ||
| 85 | const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); | ||
| 86 | if (layer > info.resources.layers || it == end) { | ||
| 87 | return std::nullopt; | ||
| 88 | } | ||
| 89 | return SubresourceBase{ | ||
| 90 | .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)), | ||
| 91 | .layer = layer, | ||
| 92 | }; | ||
| 93 | } else { | ||
| 94 | // TODO: Consider using binary_search after a threshold | ||
| 95 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 96 | if (it == slice_offsets.cend()) { | ||
| 97 | return std::nullopt; | ||
| 98 | } | ||
| 99 | return slice_subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { | ||
| 104 | const auto it = std::ranges::find(image_view_infos, view_info); | ||
| 105 | if (it == image_view_infos.end()) { | ||
| 106 | return ImageViewId{}; | ||
| 107 | } | ||
| 108 | return image_view_ids[std::distance(image_view_infos.begin(), it)]; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { | ||
| 112 | image_view_infos.push_back(view_info); | ||
| 113 | image_view_ids.push_back(image_view_id); | ||
| 114 | } | ||
| 115 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | ||
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 118 | ASSERT(lhs.info.type == rhs.info.type); | ||
| 119 | std::optional<SubresourceBase> base; | ||
| 120 | if (lhs.info.type == ImageType::Linear) { | ||
| 121 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 122 | } else { | ||
| 123 | // We are passing relaxed formats as an option, having broken views or not won't matter | ||
| 124 | static constexpr bool broken_views = false; | ||
| 125 | base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); | ||
| 126 | } | ||
| 127 | if (!base) { | ||
| 128 | LOG_ERROR(HW_GPU, "Image alias should have been flipped"); | ||
| 129 | return; | ||
| 130 | } | ||
| 131 | const PixelFormat lhs_format = lhs.info.format; | ||
| 132 | const PixelFormat rhs_format = rhs.info.format; | ||
| 133 | const Extent2D lhs_block{ | ||
| 134 | .width = DefaultBlockWidth(lhs_format), | ||
| 135 | .height = DefaultBlockHeight(lhs_format), | ||
| 136 | }; | ||
| 137 | const Extent2D rhs_block{ | ||
| 138 | .width = DefaultBlockWidth(rhs_format), | ||
| 139 | .height = DefaultBlockHeight(rhs_format), | ||
| 140 | }; | ||
| 141 | const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; | ||
| 142 | const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; | ||
| 143 | if (is_lhs_compressed && is_rhs_compressed) { | ||
| 144 | LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | const s32 lhs_mips = lhs.info.resources.levels; | ||
| 148 | const s32 rhs_mips = rhs.info.resources.levels; | ||
| 149 | const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); | ||
| 150 | AliasedImage lhs_alias; | ||
| 151 | AliasedImage rhs_alias; | ||
| 152 | lhs_alias.id = rhs_id; | ||
| 153 | rhs_alias.id = lhs_id; | ||
| 154 | lhs_alias.copies.reserve(num_mips); | ||
| 155 | rhs_alias.copies.reserve(num_mips); | ||
| 156 | for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { | ||
| 157 | Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); | ||
| 158 | Extent3D rhs_size = MipSize(rhs.info.size, mip_level); | ||
| 159 | if (is_lhs_compressed) { | ||
| 160 | lhs_size.width /= lhs_block.width; | ||
| 161 | lhs_size.height /= lhs_block.height; | ||
| 162 | } | ||
| 163 | if (is_rhs_compressed) { | ||
| 164 | rhs_size.width /= rhs_block.width; | ||
| 165 | rhs_size.height /= rhs_block.height; | ||
| 166 | } | ||
| 167 | const Extent3D copy_size{ | ||
| 168 | .width = std::min(lhs_size.width, rhs_size.width), | ||
| 169 | .height = std::min(lhs_size.height, rhs_size.height), | ||
| 170 | .depth = std::min(lhs_size.depth, rhs_size.depth), | ||
| 171 | }; | ||
| 172 | if (copy_size.width == 0 || copy_size.height == 0) { | ||
| 173 | LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | const bool is_lhs_3d = lhs.info.type == ImageType::e3D; | ||
| 177 | const bool is_rhs_3d = rhs.info.type == ImageType::e3D; | ||
| 178 | const Offset3D lhs_offset{0, 0, 0}; | ||
| 179 | const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; | ||
| 180 | const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; | ||
| 181 | const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; | ||
| 182 | const s32 num_layers = std::min(lhs_layers, rhs_layers); | ||
| 183 | const SubresourceLayers lhs_subresource{ | ||
| 184 | .base_level = mip_level, | ||
| 185 | .base_layer = 0, | ||
| 186 | .num_layers = num_layers, | ||
| 187 | }; | ||
| 188 | const SubresourceLayers rhs_subresource{ | ||
| 189 | .base_level = base->level + mip_level, | ||
| 190 | .base_layer = is_rhs_3d ? 0 : base->layer, | ||
| 191 | .num_layers = num_layers, | ||
| 192 | }; | ||
| 193 | [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ | ||
| 194 | .src_subresource = lhs_subresource, | ||
| 195 | .dst_subresource = rhs_subresource, | ||
| 196 | .src_offset = lhs_offset, | ||
| 197 | .dst_offset = rhs_offset, | ||
| 198 | .extent = copy_size, | ||
| 199 | }); | ||
| 200 | [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ | ||
| 201 | .src_subresource = rhs_subresource, | ||
| 202 | .dst_subresource = lhs_subresource, | ||
| 203 | .src_offset = rhs_offset, | ||
| 204 | .dst_offset = lhs_offset, | ||
| 205 | .extent = copy_size, | ||
| 206 | }); | ||
| 207 | ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); | ||
| 208 | ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); | ||
| 209 | } | ||
| 210 | ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); | ||
| 211 | if (lhs_alias.copies.empty()) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | lhs.aliased_images.push_back(std::move(lhs_alias)); | ||
| 215 | rhs.aliased_images.push_back(std::move(rhs_alias)); | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/texture_cache/image_info.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | enum class ImageFlagBits : u32 { | ||
| 20 | AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU | ||
| 21 | Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted | ||
| 22 | CpuModified = 1 << 2, ///< Contents have been modified from the CPU | ||
| 23 | GpuModified = 1 << 3, ///< Contents have been modified from the GPU | ||
| 24 | Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT | ||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | ||
| 26 | Registered = 1 << 6, ///< True when the image is registered | ||
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | ||
| 28 | }; | ||
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | ||
| 30 | |||
| 31 | struct ImageViewInfo; | ||
| 32 | |||
| 33 | struct AliasedImage { | ||
| 34 | std::vector<ImageCopy> copies; | ||
| 35 | ImageId id; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ImageBase { | ||
| 39 | explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 40 | |||
| 41 | [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; | ||
| 42 | |||
| 43 | [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; | ||
| 44 | |||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | ||
| 46 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | ||
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | ||
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | ||
| 50 | } | ||
| 51 | |||
| 52 | ImageInfo info; | ||
| 53 | |||
| 54 | u32 guest_size_bytes = 0; | ||
| 55 | u32 unswizzled_size_bytes = 0; | ||
| 56 | u32 converted_size_bytes = 0; | ||
| 57 | ImageFlagBits flags = ImageFlagBits::CpuModified; | ||
| 58 | |||
| 59 | GPUVAddr gpu_addr = 0; | ||
| 60 | VAddr cpu_addr = 0; | ||
| 61 | VAddr cpu_addr_end = 0; | ||
| 62 | |||
| 63 | u64 modification_tick = 0; | ||
| 64 | u64 frame_tick = 0; | ||
| 65 | |||
| 66 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | ||
| 67 | |||
| 68 | std::vector<ImageViewInfo> image_view_infos; | ||
| 69 | std::vector<ImageViewId> image_view_ids; | ||
| 70 | |||
| 71 | std::vector<u32> slice_offsets; | ||
| 72 | std::vector<SubresourceBase> slice_subresources; | ||
| 73 | |||
| 74 | std::vector<AliasedImage> aliased_images; | ||
| 75 | }; | ||
| 76 | |||
| 77 | struct ImageAllocBase { | ||
| 78 | std::vector<ImageId> images; | ||
| 79 | }; | ||
| 80 | |||
| 81 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); | ||
| 82 | |||
| 83 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/surface.h" | ||
| 7 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 8 | #include "video_core/texture_cache/image_info.h" | ||
| 9 | #include "video_core/texture_cache/samples_helper.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/texture_cache/util.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::TextureType; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | ImageInfo::ImageInfo(const TICEntry& config) noexcept { | ||
| 21 | format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 22 | config.a_type, config.srgb_conversion); | ||
| 23 | num_samples = NumSamples(config.msaa_mode); | ||
| 24 | resources.levels = config.max_mip_level + 1; | ||
| 25 | if (config.IsPitchLinear()) { | ||
| 26 | pitch = config.Pitch(); | ||
| 27 | } else if (config.IsBlockLinear()) { | ||
| 28 | block = Extent3D{ | ||
| 29 | .width = config.block_width, | ||
| 30 | .height = config.block_height, | ||
| 31 | .depth = config.block_depth, | ||
| 32 | }; | ||
| 33 | } | ||
| 34 | tile_width_spacing = config.tile_width_spacing; | ||
| 35 | if (config.texture_type != TextureType::Texture2D && | ||
| 36 | config.texture_type != TextureType::Texture2DNoMipmap) { | ||
| 37 | ASSERT(!config.IsPitchLinear()); | ||
| 38 | } | ||
| 39 | switch (config.texture_type) { | ||
| 40 | case TextureType::Texture1D: | ||
| 41 | ASSERT(config.BaseLayer() == 0); | ||
| 42 | type = ImageType::e1D; | ||
| 43 | size.width = config.Width(); | ||
| 44 | break; | ||
| 45 | case TextureType::Texture1DArray: | ||
| 46 | UNIMPLEMENTED_IF(config.BaseLayer() != 0); | ||
| 47 | type = ImageType::e1D; | ||
| 48 | size.width = config.Width(); | ||
| 49 | resources.layers = config.Depth(); | ||
| 50 | break; | ||
| 51 | case TextureType::Texture2D: | ||
| 52 | case TextureType::Texture2DNoMipmap: | ||
| 53 | ASSERT(config.Depth() == 1); | ||
| 54 | type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; | ||
| 55 | size.width = config.Width(); | ||
| 56 | size.height = config.Height(); | ||
| 57 | resources.layers = config.BaseLayer() + 1; | ||
| 58 | break; | ||
| 59 | case TextureType::Texture2DArray: | ||
| 60 | type = ImageType::e2D; | ||
| 61 | size.width = config.Width(); | ||
| 62 | size.height = config.Height(); | ||
| 63 | resources.layers = config.BaseLayer() + config.Depth(); | ||
| 64 | break; | ||
| 65 | case TextureType::TextureCubemap: | ||
| 66 | ASSERT(config.Depth() == 1); | ||
| 67 | type = ImageType::e2D; | ||
| 68 | size.width = config.Width(); | ||
| 69 | size.height = config.Height(); | ||
| 70 | resources.layers = config.BaseLayer() + 6; | ||
| 71 | break; | ||
| 72 | case TextureType::TextureCubeArray: | ||
| 73 | UNIMPLEMENTED_IF(config.load_store_hint != 0); | ||
| 74 | type = ImageType::e2D; | ||
| 75 | size.width = config.Width(); | ||
| 76 | size.height = config.Height(); | ||
| 77 | resources.layers = config.BaseLayer() + config.Depth() * 6; | ||
| 78 | break; | ||
| 79 | case TextureType::Texture3D: | ||
| 80 | ASSERT(config.BaseLayer() == 0); | ||
| 81 | type = ImageType::e3D; | ||
| 82 | size.width = config.Width(); | ||
| 83 | size.height = config.Height(); | ||
| 84 | size.depth = config.Depth(); | ||
| 85 | break; | ||
| 86 | case TextureType::Texture1DBuffer: | ||
| 87 | type = ImageType::Buffer; | ||
| 88 | size.width = config.Width(); | ||
| 89 | break; | ||
| 90 | default: | ||
| 91 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | if (type != ImageType::Linear) { | ||
| 95 | // FIXME: Call this without passing *this | ||
| 96 | layer_stride = CalculateLayerStride(*this); | ||
| 97 | maybe_unaligned_layer_stride = CalculateLayerSize(*this); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { | ||
| 102 | const auto& rt = regs.rt[index]; | ||
| 103 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); | ||
| 104 | if (rt.tile_mode.is_pitch_linear) { | ||
| 105 | ASSERT(rt.tile_mode.is_3d == 0); | ||
| 106 | type = ImageType::Linear; | ||
| 107 | pitch = rt.width; | ||
| 108 | size = Extent3D{ | ||
| 109 | .width = pitch / BytesPerBlock(format), | ||
| 110 | .height = rt.height, | ||
| 111 | .depth = 1, | ||
| 112 | }; | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | size.width = rt.width; | ||
| 116 | size.height = rt.height; | ||
| 117 | layer_stride = rt.layer_stride * 4; | ||
| 118 | maybe_unaligned_layer_stride = layer_stride; | ||
| 119 | num_samples = NumSamples(regs.multisample_mode); | ||
| 120 | block = Extent3D{ | ||
| 121 | .width = rt.tile_mode.block_width, | ||
| 122 | .height = rt.tile_mode.block_height, | ||
| 123 | .depth = rt.tile_mode.block_depth, | ||
| 124 | }; | ||
| 125 | if (rt.tile_mode.is_3d) { | ||
| 126 | type = ImageType::e3D; | ||
| 127 | size.depth = rt.depth; | ||
| 128 | } else { | ||
| 129 | type = ImageType::e2D; | ||
| 130 | resources.layers = rt.depth; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | ||
| 135 | format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 136 | size.width = regs.zeta_width; | ||
| 137 | size.height = regs.zeta_height; | ||
| 138 | resources.levels = 1; | ||
| 139 | layer_stride = regs.zeta.layer_stride * 4; | ||
| 140 | maybe_unaligned_layer_stride = layer_stride; | ||
| 141 | num_samples = NumSamples(regs.multisample_mode); | ||
| 142 | block = Extent3D{ | ||
| 143 | .width = regs.zeta.tile_mode.block_width, | ||
| 144 | .height = regs.zeta.tile_mode.block_height, | ||
| 145 | .depth = regs.zeta.tile_mode.block_depth, | ||
| 146 | }; | ||
| 147 | if (regs.zeta.tile_mode.is_pitch_linear) { | ||
| 148 | ASSERT(regs.zeta.tile_mode.is_3d == 0); | ||
| 149 | type = ImageType::Linear; | ||
| 150 | pitch = size.width * BytesPerBlock(format); | ||
| 151 | } else if (regs.zeta.tile_mode.is_3d) { | ||
| 152 | ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); | ||
| 153 | type = ImageType::e3D; | ||
| 154 | size.depth = regs.zeta_depth; | ||
| 155 | } else { | ||
| 156 | type = ImageType::e2D; | ||
| 157 | resources.layers = regs.zeta_depth; | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | ||
| 162 | UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); | ||
| 163 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); | ||
| 164 | if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { | ||
| 165 | type = ImageType::Linear; | ||
| 166 | size = Extent3D{ | ||
| 167 | .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), | ||
| 168 | .height = config.height, | ||
| 169 | .depth = 1, | ||
| 170 | }; | ||
| 171 | pitch = config.pitch; | ||
| 172 | } else { | ||
| 173 | type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; | ||
| 174 | block = Extent3D{ | ||
| 175 | .width = config.block_width, | ||
| 176 | .height = config.block_height, | ||
| 177 | .depth = config.block_depth, | ||
| 178 | }; | ||
| 179 | // 3D blits with more than once slice are not implemented for now | ||
| 180 | // Render to individual slices | ||
| 181 | size = Extent3D{ | ||
| 182 | .width = config.width, | ||
| 183 | .height = config.height, | ||
| 184 | .depth = 1, | ||
| 185 | }; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/fermi_2d.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/surface.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | using Tegra::Texture::TICEntry; | ||
| 15 | using VideoCore::Surface::PixelFormat; | ||
| 16 | |||
| 17 | struct ImageInfo { | ||
| 18 | explicit ImageInfo() = default; | ||
| 19 | explicit ImageInfo(const TICEntry& config) noexcept; | ||
| 20 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; | ||
| 21 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; | ||
| 22 | explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; | ||
| 23 | |||
| 24 | PixelFormat format = PixelFormat::Invalid; | ||
| 25 | ImageType type = ImageType::e1D; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | Extent3D size{1, 1, 1}; | ||
| 28 | union { | ||
| 29 | Extent3D block{0, 0, 0}; | ||
| 30 | u32 pitch; | ||
| 31 | }; | ||
| 32 | u32 layer_stride = 0; | ||
| 33 | u32 maybe_unaligned_layer_stride = 0; | ||
| 34 | u32 num_samples = 1; | ||
| 35 | u32 tile_width_spacing = 0; | ||
| 36 | }; | ||
| 37 | |||
| 38 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..18f72e508 --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/compatible_formats.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/formatter.h" | ||
| 12 | #include "video_core/texture_cache/image_info.h" | ||
| 13 | #include "video_core/texture_cache/image_view_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 20 | ImageId image_id_) | ||
| 21 | : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, | ||
| 22 | size{ | ||
| 23 | .width = std::max(image_info.size.width >> range.base.level, 1u), | ||
| 24 | .height = std::max(image_info.size.height >> range.base.level, 1u), | ||
| 25 | .depth = std::max(image_info.size.depth >> range.base.level, 1u), | ||
| 26 | } { | ||
| 27 | ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), | ||
| 28 | "Image view format {} is incompatible with image format {}", info.format, | ||
| 29 | image_info.format); | ||
| 30 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 31 | if (image_info.type == ImageType::Linear && is_async) { | ||
| 32 | flags |= ImageViewFlagBits::PreemtiveDownload; | ||
| 33 | } | ||
| 34 | if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { | ||
| 35 | flags |= ImageViewFlagBits::Slice; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | ImageViewBase::ImageViewBase(const NullImageParams&) {} | ||
| 40 | |||
| 41 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | using VideoCore::Surface::PixelFormat; | ||
| 14 | |||
| 15 | struct ImageViewInfo; | ||
| 16 | struct ImageInfo; | ||
| 17 | |||
| 18 | struct NullImageParams {}; | ||
| 19 | |||
| 20 | enum class ImageViewFlagBits : u16 { | ||
| 21 | PreemtiveDownload = 1 << 0, | ||
| 22 | Strong = 1 << 1, | ||
| 23 | Slice = 1 << 2, | ||
| 24 | }; | ||
| 25 | DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) | ||
| 26 | |||
| 27 | struct ImageViewBase { | ||
| 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 29 | ImageId image_id); | ||
| 30 | explicit ImageViewBase(const NullImageParams&); | ||
| 31 | |||
| 32 | [[nodiscard]] bool IsBuffer() const noexcept { | ||
| 33 | return type == ImageViewType::Buffer; | ||
| 34 | } | ||
| 35 | |||
| 36 | ImageId image_id{}; | ||
| 37 | PixelFormat format{}; | ||
| 38 | ImageViewType type{}; | ||
| 39 | SubresourceRange range; | ||
| 40 | Extent3D size{0, 0, 0}; | ||
| 41 | ImageViewFlagBits flags{}; | ||
| 42 | |||
| 43 | u64 invalidation_tick = 0; | ||
| 44 | u64 modification_tick = 0; | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "video_core/texture_cache/image_view_info.h" | ||
| 9 | #include "video_core/texture_cache/texture_cache.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/textures/texture.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); | ||
| 18 | |||
| 19 | [[nodiscard]] u8 CastSwizzle(SwizzleSource source) { | ||
| 20 | const u8 casted = static_cast<u8>(source); | ||
| 21 | ASSERT(static_cast<SwizzleSource>(casted) == source); | ||
| 22 | return casted; | ||
| 23 | } | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept | ||
| 28 | : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, | ||
| 29 | y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, | ||
| 30 | w_source{CastSwizzle(config.w_source)} { | ||
| 31 | range.base = SubresourceBase{ | ||
| 32 | .level = static_cast<s32>(config.res_min_mip_level), | ||
| 33 | .layer = base_layer, | ||
| 34 | }; | ||
| 35 | range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; | ||
| 36 | |||
| 37 | switch (config.texture_type) { | ||
| 38 | case TextureType::Texture1D: | ||
| 39 | ASSERT(config.Height() == 1); | ||
| 40 | ASSERT(config.Depth() == 1); | ||
| 41 | type = ImageViewType::e1D; | ||
| 42 | break; | ||
| 43 | case TextureType::Texture2D: | ||
| 44 | case TextureType::Texture2DNoMipmap: | ||
| 45 | ASSERT(config.Depth() == 1); | ||
| 46 | type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; | ||
| 47 | break; | ||
| 48 | case TextureType::Texture3D: | ||
| 49 | type = ImageViewType::e3D; | ||
| 50 | break; | ||
| 51 | case TextureType::TextureCubemap: | ||
| 52 | ASSERT(config.Depth() == 1); | ||
| 53 | type = ImageViewType::Cube; | ||
| 54 | range.extent.layers = 6; | ||
| 55 | break; | ||
| 56 | case TextureType::Texture1DArray: | ||
| 57 | type = ImageViewType::e1DArray; | ||
| 58 | range.extent.layers = config.Depth(); | ||
| 59 | break; | ||
| 60 | case TextureType::Texture2DArray: | ||
| 61 | type = ImageViewType::e2DArray; | ||
| 62 | range.extent.layers = config.Depth(); | ||
| 63 | break; | ||
| 64 | case TextureType::Texture1DBuffer: | ||
| 65 | type = ImageViewType::Buffer; | ||
| 66 | break; | ||
| 67 | case TextureType::TextureCubeArray: | ||
| 68 | type = ImageViewType::CubeArray; | ||
| 69 | range.extent.layers = config.Depth() * 6; | ||
| 70 | break; | ||
| 71 | default: | ||
| 72 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, | ||
| 78 | SubresourceRange range_) noexcept | ||
| 79 | : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, | ||
| 80 | y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, | ||
| 81 | w_source{RENDER_TARGET_SWIZZLE} {} | ||
| 82 | |||
| 83 | bool ImageViewInfo::IsRenderTarget() const noexcept { | ||
| 84 | return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && | ||
| 85 | z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::SwizzleSource; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | /// Properties used to determine a image view | ||
| 21 | struct ImageViewInfo { | ||
| 22 | explicit ImageViewInfo() noexcept = default; | ||
| 23 | explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; | ||
| 24 | explicit ImageViewInfo(ImageViewType type, PixelFormat format, | ||
| 25 | SubresourceRange range = {}) noexcept; | ||
| 26 | |||
| 27 | auto operator<=>(const ImageViewInfo&) const noexcept = default; | ||
| 28 | |||
| 29 | [[nodiscard]] bool IsRenderTarget() const noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept { | ||
| 32 | return std::array{ | ||
| 33 | static_cast<SwizzleSource>(x_source), | ||
| 34 | static_cast<SwizzleSource>(y_source), | ||
| 35 | static_cast<SwizzleSource>(z_source), | ||
| 36 | static_cast<SwizzleSource>(w_source), | ||
| 37 | }; | ||
| 38 | } | ||
| 39 | |||
| 40 | ImageViewType type{}; | ||
| 41 | PixelFormat format{}; | ||
| 42 | SubresourceRange range; | ||
| 43 | u8 x_source = static_cast<u8>(SwizzleSource::R); | ||
| 44 | u8 y_source = static_cast<u8>(SwizzleSource::G); | ||
| 45 | u8 z_source = static_cast<u8>(SwizzleSource::B); | ||
| 46 | u8 w_source = static_cast<u8>(SwizzleSource::A); | ||
| 47 | }; | ||
| 48 | static_assert(std::has_unique_object_representations_v<ImageViewInfo>); | ||
| 49 | |||
| 50 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | /// Framebuffer properties used to lookup a framebuffer | ||
| 17 | struct RenderTargets { | ||
| 18 | constexpr auto operator<=>(const RenderTargets&) const noexcept = default; | ||
| 19 | |||
| 20 | constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { | ||
| 21 | const auto contains = [elements](ImageViewId item) { | ||
| 22 | return std::ranges::find(elements, item) != elements.end(); | ||
| 23 | }; | ||
| 24 | return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::array<ImageViewId, NUM_RT> color_buffer_ids; | ||
| 28 | ImageViewId depth_buffer_id; | ||
| 29 | std::array<u8, NUM_RT> draw_buffers{}; | ||
| 30 | Extent2D size; | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
| 34 | |||
| 35 | namespace std { | ||
| 36 | |||
| 37 | template <> | ||
| 38 | struct hash<VideoCommon::RenderTargets> { | ||
| 39 | size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { | ||
| 40 | using VideoCommon::ImageViewId; | ||
| 41 | size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id); | ||
| 42 | for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { | ||
| 43 | value ^= std::hash<ImageViewId>{}(color_buffer_id); | ||
| 44 | } | ||
| 45 | value ^= Common::BitCast<u64>(rt.draw_buffers); | ||
| 46 | value ^= Common::BitCast<u64>(rt.size); | ||
| 47 | return value; | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | [[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) { | ||
| 15 | switch (num_samples) { | ||
| 16 | case 1: | ||
| 17 | return {0, 0}; | ||
| 18 | case 2: | ||
| 19 | return {1, 0}; | ||
| 20 | case 4: | ||
| 21 | return {1, 1}; | ||
| 22 | case 8: | ||
| 23 | return {2, 1}; | ||
| 24 | case 16: | ||
| 25 | return {2, 2}; | ||
| 26 | } | ||
| 27 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 28 | return {1, 1}; | ||
| 29 | } | ||
| 30 | |||
| 31 | [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 32 | using Tegra::Texture::MsaaMode; | ||
| 33 | switch (msaa_mode) { | ||
| 34 | case MsaaMode::Msaa1x1: | ||
| 35 | return 1; | ||
| 36 | case MsaaMode::Msaa2x1: | ||
| 37 | case MsaaMode::Msaa2x1_D3D: | ||
| 38 | return 2; | ||
| 39 | case MsaaMode::Msaa2x2: | ||
| 40 | case MsaaMode::Msaa2x2_VC4: | ||
| 41 | case MsaaMode::Msaa2x2_VC12: | ||
| 42 | return 4; | ||
| 43 | case MsaaMode::Msaa4x2: | ||
| 44 | case MsaaMode::Msaa4x2_D3D: | ||
| 45 | case MsaaMode::Msaa4x2_VC8: | ||
| 46 | case MsaaMode::Msaa4x2_VC24: | ||
| 47 | return 8; | ||
| 48 | case MsaaMode::Msaa4x4: | ||
| 49 | return 16; | ||
| 50 | } | ||
| 51 | UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode)); | ||
| 52 | return 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <concepts> | ||
| 9 | #include <numeric> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | struct SlotId { | ||
| 20 | static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); | ||
| 21 | |||
| 22 | constexpr auto operator<=>(const SlotId&) const noexcept = default; | ||
| 23 | |||
| 24 | constexpr explicit operator bool() const noexcept { | ||
| 25 | return index != INVALID_INDEX; | ||
| 26 | } | ||
| 27 | |||
| 28 | u32 index = INVALID_INDEX; | ||
| 29 | }; | ||
| 30 | |||
| 31 | template <class T> | ||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | ||
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | ||
| 34 | public: | ||
| 35 | ~SlotVector() noexcept { | ||
| 36 | size_t index = 0; | ||
| 37 | for (u64 bits : stored_bitset) { | ||
| 38 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 39 | if ((bits & 1) != 0) { | ||
| 40 | values[index + bit].object.~T(); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | index += 64; | ||
| 44 | } | ||
| 45 | delete[] values; | ||
| 46 | } | ||
| 47 | |||
| 48 | [[nodiscard]] T& operator[](SlotId id) noexcept { | ||
| 49 | ValidateIndex(id); | ||
| 50 | return values[id.index].object; | ||
| 51 | } | ||
| 52 | |||
| 53 | [[nodiscard]] const T& operator[](SlotId id) const noexcept { | ||
| 54 | ValidateIndex(id); | ||
| 55 | return values[id.index].object; | ||
| 56 | } | ||
| 57 | |||
| 58 | template <typename... Args> | ||
| 59 | [[nodiscard]] SlotId insert(Args&&... args) noexcept { | ||
| 60 | const u32 index = FreeValueIndex(); | ||
| 61 | new (&values[index].object) T(std::forward<Args>(args)...); | ||
| 62 | SetStorageBit(index); | ||
| 63 | |||
| 64 | return SlotId{index}; | ||
| 65 | } | ||
| 66 | |||
| 67 | void erase(SlotId id) noexcept { | ||
| 68 | values[id.index].object.~T(); | ||
| 69 | free_list.push_back(id.index); | ||
| 70 | ResetStorageBit(id.index); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | struct NonTrivialDummy { | ||
| 75 | NonTrivialDummy() noexcept {} | ||
| 76 | }; | ||
| 77 | |||
| 78 | union Entry { | ||
| 79 | Entry() noexcept : dummy{} {} | ||
| 80 | ~Entry() noexcept {} | ||
| 81 | |||
| 82 | NonTrivialDummy dummy; | ||
| 83 | T object; | ||
| 84 | }; | ||
| 85 | |||
| 86 | void SetStorageBit(u32 index) noexcept { | ||
| 87 | stored_bitset[index / 64] |= u64(1) << (index % 64); | ||
| 88 | } | ||
| 89 | |||
| 90 | void ResetStorageBit(u32 index) noexcept { | ||
| 91 | stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); | ||
| 92 | } | ||
| 93 | |||
| 94 | bool ReadStorageBit(u32 index) noexcept { | ||
| 95 | return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | void ValidateIndex(SlotId id) const noexcept { | ||
| 99 | DEBUG_ASSERT(id); | ||
| 100 | DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); | ||
| 101 | DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); | ||
| 102 | } | ||
| 103 | |||
| 104 | [[nodiscard]] u32 FreeValueIndex() noexcept { | ||
| 105 | if (free_list.empty()) { | ||
| 106 | Reserve(values_capacity ? (values_capacity << 1) : 1); | ||
| 107 | } | ||
| 108 | const u32 free_index = free_list.back(); | ||
| 109 | free_list.pop_back(); | ||
| 110 | return free_index; | ||
| 111 | } | ||
| 112 | |||
| 113 | void Reserve(size_t new_capacity) noexcept { | ||
| 114 | Entry* const new_values = new Entry[new_capacity]; | ||
| 115 | size_t index = 0; | ||
| 116 | for (u64 bits : stored_bitset) { | ||
| 117 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 118 | const size_t i = index + bit; | ||
| 119 | if ((bits & 1) == 0) { | ||
| 120 | continue; | ||
| 121 | } | ||
| 122 | T& old_value = values[i].object; | ||
| 123 | new (&new_values[i].object) T(std::move(old_value)); | ||
| 124 | old_value.~T(); | ||
| 125 | } | ||
| 126 | index += 64; | ||
| 127 | } | ||
| 128 | |||
| 129 | stored_bitset.resize((new_capacity + 63) / 64); | ||
| 130 | |||
| 131 | const size_t old_free_size = free_list.size(); | ||
| 132 | free_list.resize(old_free_size + (new_capacity - values_capacity)); | ||
| 133 | std::iota(free_list.begin() + old_free_size, free_list.end(), | ||
| 134 | static_cast<u32>(values_capacity)); | ||
| 135 | |||
| 136 | delete[] values; | ||
| 137 | values = new_values; | ||
| 138 | values_capacity = new_capacity; | ||
| 139 | } | ||
| 140 | |||
| 141 | Entry* values = nullptr; | ||
| 142 | size_t values_capacity = 0; | ||
| 143 | size_t values_size = 0; | ||
| 144 | |||
| 145 | std::vector<u64> stored_bitset; | ||
| 146 | std::vector<u32> free_list; | ||
| 147 | }; | ||
| 148 | |||
| 149 | } // namespace VideoCommon | ||
| 150 | |||
| 151 | template <> | ||
| 152 | struct std::hash<VideoCommon::SlotId> { | ||
| 153 | size_t operator()(const VideoCommon::SlotId& id) const noexcept { | ||
| 154 | return std::hash<u32>{}(id.index); | ||
| 155 | } | ||
| 156 | }; | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index efbcf6723..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null | |||
| @@ -1,299 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/algorithm.h" | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/texture_cache/surface_base.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | #include "video_core/textures/convert.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); | ||
| 17 | MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); | ||
| 18 | |||
| 19 | using Tegra::Texture::ConvertFromGuestToHost; | ||
| 20 | using VideoCore::MortonSwizzleMode; | ||
| 21 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 22 | using VideoCore::Surface::PixelFormat; | ||
| 23 | |||
| 24 | StagingCache::StagingCache() = default; | ||
| 25 | |||
| 26 | StagingCache::~StagingCache() = default; | ||
| 27 | |||
| 28 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 29 | bool is_astc_supported_) | ||
| 30 | : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), | ||
| 31 | mipmap_offsets(params.num_levels) { | ||
| 32 | is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; | ||
| 33 | host_memory_size = params.GetHostSizeInBytes(is_converted); | ||
| 34 | |||
| 35 | std::size_t offset = 0; | ||
| 36 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 37 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||
| 38 | mipmap_sizes[level] = mipmap_size; | ||
| 39 | mipmap_offsets[level] = offset; | ||
| 40 | offset += mipmap_size; | ||
| 41 | } | ||
| 42 | layer_size = offset; | ||
| 43 | if (params.is_layered) { | ||
| 44 | if (params.is_tiled) { | ||
| 45 | layer_size = | ||
| 46 | SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||
| 47 | } | ||
| 48 | guest_memory_size = layer_size * params.depth; | ||
| 49 | } else { | ||
| 50 | guest_memory_size = layer_size; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { | ||
| 55 | const u32 src_bpp{params.GetBytesPerPixel()}; | ||
| 56 | const u32 dst_bpp{rhs.GetBytesPerPixel()}; | ||
| 57 | const bool ib1 = params.IsBuffer(); | ||
| 58 | const bool ib2 = rhs.IsBuffer(); | ||
| 59 | if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { | ||
| 60 | const bool cb1 = params.IsCompressed(); | ||
| 61 | const bool cb2 = rhs.IsCompressed(); | ||
| 62 | if (cb1 == cb2) { | ||
| 63 | return MatchTopologyResult::FullMatch; | ||
| 64 | } | ||
| 65 | return MatchTopologyResult::CompressUnmatch; | ||
| 66 | } | ||
| 67 | return MatchTopologyResult::None; | ||
| 68 | } | ||
| 69 | |||
| 70 | MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { | ||
| 71 | // Buffer surface Check | ||
| 72 | if (params.IsBuffer()) { | ||
| 73 | const std::size_t wd1 = params.width * params.GetBytesPerPixel(); | ||
| 74 | const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); | ||
| 75 | if (wd1 == wd2) { | ||
| 76 | return MatchStructureResult::FullMatch; | ||
| 77 | } | ||
| 78 | return MatchStructureResult::None; | ||
| 79 | } | ||
| 80 | |||
| 81 | // Linear Surface check | ||
| 82 | if (!params.is_tiled) { | ||
| 83 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { | ||
| 84 | if (params.width == rhs.width) { | ||
| 85 | return MatchStructureResult::FullMatch; | ||
| 86 | } else { | ||
| 87 | return MatchStructureResult::SemiMatch; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | return MatchStructureResult::None; | ||
| 91 | } | ||
| 92 | |||
| 93 | // Tiled Surface check | ||
| 94 | if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, | ||
| 95 | params.tile_width_spacing, params.num_levels) == | ||
| 96 | std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 97 | rhs.tile_width_spacing, rhs.num_levels)) { | ||
| 98 | if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { | ||
| 99 | return MatchStructureResult::FullMatch; | ||
| 100 | } | ||
| 101 | const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, | ||
| 102 | rhs.pixel_format); | ||
| 103 | const u32 hs = | ||
| 104 | SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); | ||
| 105 | const u32 w1 = params.GetBlockAlignedWidth(); | ||
| 106 | if (std::tie(w1, params.height) == std::tie(ws, hs)) { | ||
| 107 | return MatchStructureResult::SemiMatch; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | return MatchStructureResult::None; | ||
| 111 | } | ||
| 112 | |||
| 113 | std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | ||
| 114 | const GPUVAddr candidate_gpu_addr) const { | ||
| 115 | if (gpu_addr == candidate_gpu_addr) { | ||
| 116 | return {{0, 0}}; | ||
| 117 | } | ||
| 118 | |||
| 119 | if (candidate_gpu_addr < gpu_addr) { | ||
| 120 | return std::nullopt; | ||
| 121 | } | ||
| 122 | |||
| 123 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | ||
| 124 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||
| 125 | if (layer >= params.depth) { | ||
| 126 | return std::nullopt; | ||
| 127 | } | ||
| 128 | |||
| 129 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||
| 130 | const auto mipmap_it = | ||
| 131 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||
| 132 | if (mipmap_it == mipmap_offsets.end()) { | ||
| 133 | return std::nullopt; | ||
| 134 | } | ||
| 135 | |||
| 136 | const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; | ||
| 137 | return std::make_pair(layer, level); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { | ||
| 141 | const u32 layers{params.depth}; | ||
| 142 | const u32 mipmaps{params.num_levels}; | ||
| 143 | std::vector<CopyParams> result; | ||
| 144 | result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); | ||
| 145 | |||
| 146 | for (u32 layer = 0; layer < layers; layer++) { | ||
| 147 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 148 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 149 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 150 | result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | return result; | ||
| 154 | } | ||
| 155 | |||
| 156 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { | ||
| 157 | const u32 mipmaps{params.num_levels}; | ||
| 158 | std::vector<CopyParams> result; | ||
| 159 | result.reserve(mipmaps); | ||
| 160 | |||
| 161 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 162 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 163 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 164 | const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; | ||
| 165 | result.emplace_back(width, height, depth, level); | ||
| 166 | } | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, | ||
| 171 | const SurfaceParams& surface_params, u8* buffer, u32 level) { | ||
| 172 | const u32 width{surface_params.GetMipWidth(level)}; | ||
| 173 | const u32 height{surface_params.GetMipHeight(level)}; | ||
| 174 | const u32 block_height{surface_params.GetMipBlockHeight(level)}; | ||
| 175 | const u32 block_depth{surface_params.GetMipBlockDepth(level)}; | ||
| 176 | |||
| 177 | std::size_t guest_offset{mipmap_offsets[level]}; | ||
| 178 | if (surface_params.is_layered) { | ||
| 179 | std::size_t host_offset = 0; | ||
| 180 | const std::size_t guest_stride = layer_size; | ||
| 181 | const std::size_t host_stride = surface_params.GetHostLayerSize(level); | ||
| 182 | for (u32 layer = 0; layer < surface_params.depth; ++layer) { | ||
| 183 | MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, | ||
| 184 | block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, | ||
| 185 | memory + guest_offset); | ||
| 186 | guest_offset += guest_stride; | ||
| 187 | host_offset += host_stride; | ||
| 188 | } | ||
| 189 | } else { | ||
| 190 | MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, | ||
| 191 | surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, | ||
| 192 | memory + guest_offset); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | ||
| 197 | StagingCache& staging_cache) { | ||
| 198 | MICROPROFILE_SCOPE(GPU_Load_Texture); | ||
| 199 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 200 | u8* host_ptr; | ||
| 201 | // Use an extra temporal buffer | ||
| 202 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 203 | tmp_buffer.resize(guest_memory_size); | ||
| 204 | host_ptr = tmp_buffer.data(); | ||
| 205 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 206 | |||
| 207 | if (params.is_tiled) { | ||
| 208 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||
| 209 | params.block_width, static_cast<u32>(params.target)); | ||
| 210 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 211 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 212 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | ||
| 213 | staging_buffer.data() + host_offset, level); | ||
| 214 | } | ||
| 215 | } else { | ||
| 216 | ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); | ||
| 217 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 218 | const u32 block_width{params.GetDefaultBlockWidth()}; | ||
| 219 | const u32 block_height{params.GetDefaultBlockHeight()}; | ||
| 220 | const u32 width{(params.width + block_width - 1) / block_width}; | ||
| 221 | const u32 height{(params.height + block_height - 1) / block_height}; | ||
| 222 | const u32 copy_size{width * bpp}; | ||
| 223 | if (params.pitch == copy_size) { | ||
| 224 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); | ||
| 225 | } else { | ||
| 226 | const u8* start{host_ptr}; | ||
| 227 | u8* write_to{staging_buffer.data()}; | ||
| 228 | for (u32 h = height; h > 0; --h) { | ||
| 229 | std::memcpy(write_to, start, copy_size); | ||
| 230 | start += params.pitch; | ||
| 231 | write_to += copy_size; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | |||
| 240 | for (u32 level = params.num_levels; level--;) { | ||
| 241 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 242 | const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; | ||
| 243 | u8* const in_buffer = staging_buffer.data() + in_host_offset; | ||
| 244 | u8* const out_buffer = staging_buffer.data() + out_host_offset; | ||
| 245 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | ||
| 246 | params.GetMipWidth(level), params.GetMipHeight(level), | ||
| 247 | params.GetMipDepth(level), true, true); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | ||
| 252 | StagingCache& staging_cache) { | ||
| 253 | MICROPROFILE_SCOPE(GPU_Flush_Texture); | ||
| 254 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 255 | u8* host_ptr; | ||
| 256 | |||
| 257 | // Use an extra temporal buffer | ||
| 258 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 259 | tmp_buffer.resize(guest_memory_size); | ||
| 260 | host_ptr = tmp_buffer.data(); | ||
| 261 | |||
| 262 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 263 | // Special case for 3D texture segments | ||
| 264 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 265 | } | ||
| 266 | |||
| 267 | if (params.is_tiled) { | ||
| 268 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||
| 269 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 270 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 271 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | ||
| 272 | staging_buffer.data() + host_offset, level); | ||
| 273 | } | ||
| 274 | } else if (params.IsBuffer()) { | ||
| 275 | // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest | ||
| 276 | // memory. | ||
| 277 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 278 | } else { | ||
| 279 | ASSERT(params.target == SurfaceTarget::Texture2D); | ||
| 280 | ASSERT(params.num_levels == 1); | ||
| 281 | |||
| 282 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 283 | const u32 copy_size{params.width * bpp}; | ||
| 284 | if (params.pitch == copy_size) { | ||
| 285 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 286 | } else { | ||
| 287 | u8* start{host_ptr}; | ||
| 288 | const u8* read_to{staging_buffer.data()}; | ||
| 289 | for (u32 h = params.height; h > 0; --h) { | ||
| 290 | std::memcpy(start, read_to, copy_size); | ||
| 291 | start += params.pitch; | ||
| 292 | read_to += copy_size; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | } | ||
| 296 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 297 | } | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index b57135fe4..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null | |||
| @@ -1,333 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <tuple> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/gpu.h" | ||
| 14 | #include "video_core/morton.h" | ||
| 15 | #include "video_core/texture_cache/copy_params.h" | ||
| 16 | #include "video_core/texture_cache/surface_params.h" | ||
| 17 | #include "video_core/texture_cache/surface_view.h" | ||
| 18 | |||
| 19 | namespace Tegra { | ||
| 20 | class MemoryManager; | ||
| 21 | } | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using VideoCore::MortonSwizzleMode; | ||
| 26 | using VideoCore::Surface::SurfaceTarget; | ||
| 27 | |||
| 28 | enum class MatchStructureResult : u32 { | ||
| 29 | FullMatch = 0, | ||
| 30 | SemiMatch = 1, | ||
| 31 | None = 2, | ||
| 32 | }; | ||
| 33 | |||
| 34 | enum class MatchTopologyResult : u32 { | ||
| 35 | FullMatch = 0, | ||
| 36 | CompressUnmatch = 1, | ||
| 37 | None = 2, | ||
| 38 | }; | ||
| 39 | |||
| 40 | class StagingCache { | ||
| 41 | public: | ||
| 42 | explicit StagingCache(); | ||
| 43 | ~StagingCache(); | ||
| 44 | |||
| 45 | std::vector<u8>& GetBuffer(std::size_t index) { | ||
| 46 | return staging_buffer[index]; | ||
| 47 | } | ||
| 48 | |||
| 49 | const std::vector<u8>& GetBuffer(std::size_t index) const { | ||
| 50 | return staging_buffer[index]; | ||
| 51 | } | ||
| 52 | |||
| 53 | void SetSize(std::size_t size) { | ||
| 54 | staging_buffer.resize(size); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | std::vector<std::vector<u8>> staging_buffer; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class SurfaceBaseImpl { | ||
| 62 | public: | ||
| 63 | void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 64 | |||
| 65 | void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 66 | |||
| 67 | GPUVAddr GetGpuAddr() const { | ||
| 68 | return gpu_addr; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool Overlaps(const VAddr start, const VAddr end) const { | ||
| 72 | return (cpu_addr < end) && (cpu_addr_end > start); | ||
| 73 | } | ||
| 74 | |||
| 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { | ||
| 76 | const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; | ||
| 77 | return gpu_addr <= other_start && other_end <= gpu_addr_end; | ||
| 78 | } | ||
| 79 | |||
| 80 | // Use only when recycling a surface | ||
| 81 | void SetGpuAddr(const GPUVAddr new_addr) { | ||
| 82 | gpu_addr = new_addr; | ||
| 83 | } | ||
| 84 | |||
| 85 | VAddr GetCpuAddr() const { | ||
| 86 | return cpu_addr; | ||
| 87 | } | ||
| 88 | |||
| 89 | VAddr GetCpuAddrEnd() const { | ||
| 90 | return cpu_addr_end; | ||
| 91 | } | ||
| 92 | |||
| 93 | void SetCpuAddr(const VAddr new_addr) { | ||
| 94 | cpu_addr = new_addr; | ||
| 95 | cpu_addr_end = new_addr + guest_memory_size; | ||
| 96 | } | ||
| 97 | |||
| 98 | const SurfaceParams& GetSurfaceParams() const { | ||
| 99 | return params; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::size_t GetSizeInBytes() const { | ||
| 103 | return guest_memory_size; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::size_t GetHostSizeInBytes() const { | ||
| 107 | return host_memory_size; | ||
| 108 | } | ||
| 109 | |||
| 110 | std::size_t GetMipmapSize(const u32 level) const { | ||
| 111 | return mipmap_sizes[level]; | ||
| 112 | } | ||
| 113 | |||
| 114 | bool IsLinear() const { | ||
| 115 | return !params.is_tiled; | ||
| 116 | } | ||
| 117 | |||
| 118 | bool IsConverted() const { | ||
| 119 | return is_converted; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | ||
| 123 | return params.pixel_format == pixel_format; | ||
| 124 | } | ||
| 125 | |||
| 126 | VideoCore::Surface::PixelFormat GetFormat() const { | ||
| 127 | return params.pixel_format; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { | ||
| 131 | return params.target == target; | ||
| 132 | } | ||
| 133 | |||
| 134 | MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; | ||
| 135 | |||
| 136 | MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; | ||
| 137 | |||
| 138 | bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { | ||
| 139 | return std::tie(gpu_addr, params.target, params.num_levels) == | ||
| 140 | std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && | ||
| 141 | params.target == SurfaceTarget::Texture2D && params.num_levels == 1; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; | ||
| 145 | |||
| 146 | std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { | ||
| 147 | return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); | ||
| 148 | } | ||
| 149 | |||
| 150 | protected: | ||
| 151 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 152 | bool is_astc_supported_); | ||
| 153 | ~SurfaceBaseImpl() = default; | ||
| 154 | |||
| 155 | virtual void DecorateSurfaceName() = 0; | ||
| 156 | |||
| 157 | const SurfaceParams params; | ||
| 158 | std::size_t layer_size; | ||
| 159 | std::size_t guest_memory_size; | ||
| 160 | std::size_t host_memory_size; | ||
| 161 | GPUVAddr gpu_addr{}; | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | VAddr cpu_addr_end{}; | ||
| 164 | bool is_converted{}; | ||
| 165 | |||
| 166 | std::vector<std::size_t> mipmap_sizes; | ||
| 167 | std::vector<std::size_t> mipmap_offsets; | ||
| 168 | |||
| 169 | private: | ||
| 170 | void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, | ||
| 171 | u8* buffer, u32 level); | ||
| 172 | |||
| 173 | std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; | ||
| 174 | |||
| 175 | std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; | ||
| 176 | }; | ||
| 177 | |||
| 178 | template <typename TView> | ||
| 179 | class SurfaceBase : public SurfaceBaseImpl { | ||
| 180 | public: | ||
| 181 | virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; | ||
| 182 | |||
| 183 | virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; | ||
| 184 | |||
| 185 | void MarkAsModified(bool is_modified_, u64 tick) { | ||
| 186 | is_modified = is_modified_ || is_target; | ||
| 187 | modification_tick = tick; | ||
| 188 | } | ||
| 189 | |||
| 190 | void MarkAsRenderTarget(bool is_target_, u32 index_) { | ||
| 191 | is_target = is_target_; | ||
| 192 | index = index_; | ||
| 193 | } | ||
| 194 | |||
| 195 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 196 | is_memory_marked = is_memory_marked_; | ||
| 197 | } | ||
| 198 | |||
| 199 | bool IsMemoryMarked() const { | ||
| 200 | return is_memory_marked; | ||
| 201 | } | ||
| 202 | |||
| 203 | void SetSyncPending(bool is_sync_pending_) { | ||
| 204 | is_sync_pending = is_sync_pending_; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool IsSyncPending() const { | ||
| 208 | return is_sync_pending; | ||
| 209 | } | ||
| 210 | |||
| 211 | void MarkAsPicked(bool is_picked_) { | ||
| 212 | is_picked = is_picked_; | ||
| 213 | } | ||
| 214 | |||
| 215 | bool IsModified() const { | ||
| 216 | return is_modified; | ||
| 217 | } | ||
| 218 | |||
| 219 | bool IsProtected() const { | ||
| 220 | // Only 3D slices are to be protected | ||
| 221 | return is_target && params.target == SurfaceTarget::Texture3D; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool IsRenderTarget() const { | ||
| 225 | return is_target; | ||
| 226 | } | ||
| 227 | |||
| 228 | u32 GetRenderTarget() const { | ||
| 229 | return index; | ||
| 230 | } | ||
| 231 | |||
| 232 | bool IsRegistered() const { | ||
| 233 | return is_registered; | ||
| 234 | } | ||
| 235 | |||
| 236 | bool IsPicked() const { | ||
| 237 | return is_picked; | ||
| 238 | } | ||
| 239 | |||
| 240 | void MarkAsRegistered(bool is_reg) { | ||
| 241 | is_registered = is_reg; | ||
| 242 | } | ||
| 243 | |||
| 244 | u64 GetModificationTick() const { | ||
| 245 | return modification_tick; | ||
| 246 | } | ||
| 247 | |||
| 248 | TView EmplaceOverview(const SurfaceParams& overview_params) { | ||
| 249 | const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; | ||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||
| 251 | } | ||
| 252 | |||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | ||
| 259 | const GPUVAddr view_addr, | ||
| 260 | const std::size_t candidate_size, const u32 mipmap, | ||
| 261 | const u32 layer) { | ||
| 262 | const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; | ||
| 263 | if (!layer_mipmap) { | ||
| 264 | return {}; | ||
| 265 | } | ||
| 266 | const auto [end_layer, end_mipmap] = *layer_mipmap; | ||
| 267 | if (layer != end_layer) { | ||
| 268 | if (mipmap == 0 && end_mipmap == 0) { | ||
| 269 | return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); | ||
| 270 | } | ||
| 271 | return {}; | ||
| 272 | } else { | ||
| 273 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||
| 278 | const std::size_t candidate_size) { | ||
| 279 | if (params.target == SurfaceTarget::Texture3D || | ||
| 280 | view_params.target == SurfaceTarget::Texture3D || | ||
| 281 | (params.num_levels == 1 && !params.is_layered)) { | ||
| 282 | return {}; | ||
| 283 | } | ||
| 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||
| 285 | if (!layer_mipmap) { | ||
| 286 | return {}; | ||
| 287 | } | ||
| 288 | const auto [layer, mipmap] = *layer_mipmap; | ||
| 289 | if (GetMipmapSize(mipmap) != candidate_size) { | ||
| 290 | return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); | ||
| 291 | } | ||
| 292 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); | ||
| 293 | } | ||
| 294 | |||
| 295 | TView GetMainView() const { | ||
| 296 | return main_view; | ||
| 297 | } | ||
| 298 | |||
| 299 | protected: | ||
| 300 | explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, | ||
| 301 | bool is_astc_supported_) | ||
| 302 | : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} | ||
| 303 | |||
| 304 | ~SurfaceBase() = default; | ||
| 305 | |||
| 306 | virtual TView CreateView(const ViewParams& view_key) = 0; | ||
| 307 | |||
| 308 | TView main_view; | ||
| 309 | std::unordered_map<ViewParams, TView> views; | ||
| 310 | |||
| 311 | private: | ||
| 312 | TView GetView(const ViewParams& key) { | ||
| 313 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 314 | auto& view{entry->second}; | ||
| 315 | if (is_cache_miss) { | ||
| 316 | view = CreateView(key); | ||
| 317 | } | ||
| 318 | return view; | ||
| 319 | } | ||
| 320 | |||
| 321 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 322 | |||
| 323 | bool is_modified{}; | ||
| 324 | bool is_target{}; | ||
| 325 | bool is_registered{}; | ||
| 326 | bool is_picked{}; | ||
| 327 | bool is_memory_marked{}; | ||
| 328 | bool is_sync_pending{}; | ||
| 329 | u32 index{NO_RT}; | ||
| 330 | u64 modification_tick{}; | ||
| 331 | }; | ||
| 332 | |||
| 333 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 96f93246d..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null | |||
| @@ -1,445 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 15 | #include "video_core/texture_cache/surface_params.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 21 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 22 | using VideoCore::Surface::SurfaceTarget; | ||
| 23 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 24 | using VideoCore::Surface::SurfaceType; | ||
| 25 | |||
| 26 | namespace { | ||
| 27 | |||
| 28 | SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { | ||
| 29 | switch (type) { | ||
| 30 | case Tegra::Shader::TextureType::Texture1D: | ||
| 31 | return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; | ||
| 32 | case Tegra::Shader::TextureType::Texture2D: | ||
| 33 | return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 34 | case Tegra::Shader::TextureType::Texture3D: | ||
| 35 | ASSERT(!is_array); | ||
| 36 | return SurfaceTarget::Texture3D; | ||
| 37 | case Tegra::Shader::TextureType::TextureCube: | ||
| 38 | return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return SurfaceTarget::Texture2D; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { | ||
| 46 | switch (type) { | ||
| 47 | case Tegra::Shader::ImageType::Texture1D: | ||
| 48 | return SurfaceTarget::Texture1D; | ||
| 49 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 50 | return SurfaceTarget::TextureBuffer; | ||
| 51 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 52 | return SurfaceTarget::Texture1DArray; | ||
| 53 | case Tegra::Shader::ImageType::Texture2D: | ||
| 54 | return SurfaceTarget::Texture2D; | ||
| 55 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 56 | return SurfaceTarget::Texture2DArray; | ||
| 57 | case Tegra::Shader::ImageType::Texture3D: | ||
| 58 | return SurfaceTarget::Texture3D; | ||
| 59 | default: | ||
| 60 | UNREACHABLE(); | ||
| 61 | return SurfaceTarget::Texture2D; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 66 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 72 | const Tegra::Texture::TICEntry& tic, | ||
| 73 | const VideoCommon::Shader::Sampler& entry) { | ||
| 74 | SurfaceParams params; | ||
| 75 | params.is_tiled = tic.IsTiled(); | ||
| 76 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 77 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 78 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 79 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 80 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 81 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 82 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 83 | params.type = GetFormatType(params.pixel_format); | ||
| 84 | if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { | ||
| 85 | switch (params.pixel_format) { | ||
| 86 | case PixelFormat::R16_UNORM: | ||
| 87 | case PixelFormat::R16_FLOAT: | ||
| 88 | params.pixel_format = PixelFormat::D16_UNORM; | ||
| 89 | break; | ||
| 90 | case PixelFormat::R32_FLOAT: | ||
| 91 | params.pixel_format = PixelFormat::D32_FLOAT; | ||
| 92 | break; | ||
| 93 | default: | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | ||
| 95 | static_cast<u32>(params.pixel_format)); | ||
| 96 | } | ||
| 97 | params.type = GetFormatType(params.pixel_format); | ||
| 98 | } | ||
| 99 | // TODO: on 1DBuffer we should use the tic info. | ||
| 100 | if (tic.IsBuffer()) { | ||
| 101 | params.target = SurfaceTarget::TextureBuffer; | ||
| 102 | params.width = tic.Width(); | ||
| 103 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 104 | params.height = 1; | ||
| 105 | params.depth = 1; | ||
| 106 | params.num_levels = 1; | ||
| 107 | params.emulated_levels = 1; | ||
| 108 | params.is_layered = false; | ||
| 109 | } else { | ||
| 110 | params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 111 | params.width = tic.Width(); | ||
| 112 | params.height = tic.Height(); | ||
| 113 | params.depth = tic.Depth(); | ||
| 114 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 115 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 116 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 117 | params.depth *= 6; | ||
| 118 | } | ||
| 119 | params.num_levels = tic.max_mip_level + 1; | ||
| 120 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 121 | params.is_layered = params.IsLayered(); | ||
| 122 | } | ||
| 123 | return params; | ||
| 124 | } | ||
| 125 | |||
| 126 | SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, | ||
| 127 | const Tegra::Texture::TICEntry& tic, | ||
| 128 | const VideoCommon::Shader::Image& entry) { | ||
| 129 | SurfaceParams params; | ||
| 130 | params.is_tiled = tic.IsTiled(); | ||
| 131 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 132 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 133 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 134 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 135 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 136 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 137 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 138 | params.type = GetFormatType(params.pixel_format); | ||
| 139 | params.target = ImageTypeToSurfaceTarget(entry.type); | ||
| 140 | // TODO: on 1DBuffer we should use the tic info. | ||
| 141 | if (tic.IsBuffer()) { | ||
| 142 | params.target = SurfaceTarget::TextureBuffer; | ||
| 143 | params.width = tic.Width(); | ||
| 144 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 145 | params.height = 1; | ||
| 146 | params.depth = 1; | ||
| 147 | params.num_levels = 1; | ||
| 148 | params.emulated_levels = 1; | ||
| 149 | params.is_layered = false; | ||
| 150 | } else { | ||
| 151 | params.width = tic.Width(); | ||
| 152 | params.height = tic.Height(); | ||
| 153 | params.depth = tic.Depth(); | ||
| 154 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 155 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 156 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 157 | params.depth *= 6; | ||
| 158 | } | ||
| 159 | params.num_levels = tic.max_mip_level + 1; | ||
| 160 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 161 | params.is_layered = params.IsLayered(); | ||
| 162 | } | ||
| 163 | return params; | ||
| 164 | } | ||
| 165 | |||
| 166 | SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { | ||
| 167 | const auto& regs = maxwell3d.regs; | ||
| 168 | const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); | ||
| 169 | const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; | ||
| 170 | const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 171 | return { | ||
| 172 | .is_tiled = regs.zeta.memory_layout.type == | ||
| 173 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, | ||
| 174 | .srgb_conversion = false, | ||
| 175 | .is_layered = is_layered, | ||
| 176 | .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), | ||
| 177 | .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), | ||
| 178 | .block_depth = block_depth, | ||
| 179 | .tile_width_spacing = 1, | ||
| 180 | .width = regs.zeta_width, | ||
| 181 | .height = regs.zeta_height, | ||
| 182 | .depth = is_layered ? regs.zeta_layers.Value() : 1U, | ||
| 183 | .pitch = 0, | ||
| 184 | .num_levels = 1, | ||
| 185 | .emulated_levels = 1, | ||
| 186 | .pixel_format = pixel_format, | ||
| 187 | .type = GetFormatType(pixel_format), | ||
| 188 | .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, | ||
| 189 | }; | ||
| 190 | } | ||
| 191 | |||
| 192 | SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 193 | std::size_t index) { | ||
| 194 | const auto& config{maxwell3d.regs.rt[index]}; | ||
| 195 | SurfaceParams params; | ||
| 196 | params.is_tiled = | ||
| 197 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 198 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 199 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; | ||
| 200 | params.block_width = config.memory_layout.block_width; | ||
| 201 | params.block_height = config.memory_layout.block_height; | ||
| 202 | params.block_depth = config.memory_layout.block_depth; | ||
| 203 | params.tile_width_spacing = 1; | ||
| 204 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 205 | params.type = GetFormatType(params.pixel_format); | ||
| 206 | if (params.is_tiled) { | ||
| 207 | params.pitch = 0; | ||
| 208 | params.width = config.width; | ||
| 209 | } else { | ||
| 210 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 211 | params.pitch = config.width; | ||
| 212 | params.width = params.pitch / bpp; | ||
| 213 | } | ||
| 214 | params.height = config.height; | ||
| 215 | params.num_levels = 1; | ||
| 216 | params.emulated_levels = 1; | ||
| 217 | |||
| 218 | if (config.memory_layout.is_3d != 0) { | ||
| 219 | params.depth = config.layers.Value(); | ||
| 220 | params.is_layered = false; | ||
| 221 | params.target = SurfaceTarget::Texture3D; | ||
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 231 | return params; | ||
| 232 | } | ||
| 233 | |||
| 234 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 235 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 236 | const bool is_tiled = !config.linear; | ||
| 237 | const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 238 | |||
| 239 | SurfaceParams params{ | ||
| 240 | .is_tiled = is_tiled, | ||
| 241 | .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 242 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, | ||
| 243 | .is_layered = false, | ||
| 244 | .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, | ||
| 245 | .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, | ||
| 246 | .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, | ||
| 247 | .tile_width_spacing = 1, | ||
| 248 | .width = config.width, | ||
| 249 | .height = config.height, | ||
| 250 | .depth = 1, | ||
| 251 | .pitch = config.pitch, | ||
| 252 | .num_levels = 1, | ||
| 253 | .emulated_levels = 1, | ||
| 254 | .pixel_format = pixel_format, | ||
| 255 | .type = GetFormatType(pixel_format), | ||
| 256 | // TODO(Rodrigo): Try to guess texture arrays from parameters | ||
| 257 | .target = SurfaceTarget::Texture2D, | ||
| 258 | }; | ||
| 259 | |||
| 260 | params.is_layered = params.IsLayered(); | ||
| 261 | return params; | ||
| 262 | } | ||
| 263 | |||
| 264 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 265 | const VideoCommon::Shader::Sampler& entry) { | ||
| 266 | return TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 267 | } | ||
| 268 | |||
| 269 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 270 | const VideoCommon::Shader::Image& entry) { | ||
| 271 | return ImageTypeToSurfaceTarget(entry.type); | ||
| 272 | } | ||
| 273 | |||
| 274 | bool SurfaceParams::IsLayered() const { | ||
| 275 | switch (target) { | ||
| 276 | case SurfaceTarget::Texture1DArray: | ||
| 277 | case SurfaceTarget::Texture2DArray: | ||
| 278 | case SurfaceTarget::TextureCubemap: | ||
| 279 | case SurfaceTarget::TextureCubeArray: | ||
| 280 | return true; | ||
| 281 | default: | ||
| 282 | return false; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | // Auto block resizing algorithm from: | ||
| 287 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 288 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 289 | if (level == 0) { | ||
| 290 | return this->block_height; | ||
| 291 | } | ||
| 292 | |||
| 293 | const u32 height_new{GetMipHeight(level)}; | ||
| 294 | const u32 default_block_height{GetDefaultBlockHeight()}; | ||
| 295 | const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; | ||
| 296 | const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); | ||
| 297 | return std::clamp(block_height_new, 3U, 7U) - 3U; | ||
| 298 | } | ||
| 299 | |||
| 300 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 301 | if (level == 0) { | ||
| 302 | return this->block_depth; | ||
| 303 | } | ||
| 304 | if (is_layered) { | ||
| 305 | return 0; | ||
| 306 | } | ||
| 307 | |||
| 308 | const u32 depth_new{GetMipDepth(level)}; | ||
| 309 | const u32 block_depth_new = Common::Log2Ceil32(depth_new); | ||
| 310 | if (block_depth_new > 4) { | ||
| 311 | return 5 - (GetMipBlockHeight(level) >= 2); | ||
| 312 | } | ||
| 313 | return block_depth_new; | ||
| 314 | } | ||
| 315 | |||
| 316 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 317 | std::size_t offset = 0; | ||
| 318 | for (u32 i = 0; i < level; i++) { | ||
| 319 | offset += GetInnerMipmapMemorySize(i, false, false); | ||
| 320 | } | ||
| 321 | return offset; | ||
| 322 | } | ||
| 323 | |||
| 324 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { | ||
| 325 | std::size_t offset = 0; | ||
| 326 | if (is_converted) { | ||
| 327 | for (u32 i = 0; i < level; ++i) { | ||
| 328 | offset += GetConvertedMipmapSize(i) * GetNumLayers(); | ||
| 329 | } | ||
| 330 | } else { | ||
| 331 | for (u32 i = 0; i < level; ++i) { | ||
| 332 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | return offset; | ||
| 336 | } | ||
| 337 | |||
| 338 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | ||
| 339 | constexpr std::size_t rgba8_bpp = 4ULL; | ||
| 340 | const std::size_t mip_width = GetMipWidth(level); | ||
| 341 | const std::size_t mip_height = GetMipHeight(level); | ||
| 342 | const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); | ||
| 343 | return mip_width * mip_height * mip_depth * rgba8_bpp; | ||
| 344 | } | ||
| 345 | |||
| 346 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | ||
| 347 | std::size_t size = 0; | ||
| 348 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 349 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | ||
| 350 | } | ||
| 351 | if (is_tiled && is_layered) { | ||
| 352 | return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 353 | } | ||
| 354 | return size; | ||
| 355 | } | ||
| 356 | |||
| 357 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||
| 358 | bool uncompressed) const { | ||
| 359 | const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||
| 360 | const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||
| 361 | const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; | ||
| 362 | if (is_tiled) { | ||
| 363 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, | ||
| 364 | mip_height, mip_depth, GetMipBlockHeight(level), | ||
| 365 | GetMipBlockDepth(level)); | ||
| 366 | } else if (as_host_size || IsBuffer()) { | ||
| 367 | return GetBytesPerPixel() * mip_width * mip_height * mip_depth; | ||
| 368 | } else { | ||
| 369 | // Linear Texture Case | ||
| 370 | return pitch * mip_height * mip_depth; | ||
| 371 | } | ||
| 372 | } | ||
| 373 | |||
| 374 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||
| 375 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 376 | height, depth, pitch, num_levels, pixel_format, type, target) == | ||
| 377 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 378 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 379 | rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); | ||
| 380 | } | ||
| 381 | |||
| 382 | std::string SurfaceParams::TargetName() const { | ||
| 383 | switch (target) { | ||
| 384 | case SurfaceTarget::Texture1D: | ||
| 385 | return "1D"; | ||
| 386 | case SurfaceTarget::TextureBuffer: | ||
| 387 | return "TexBuffer"; | ||
| 388 | case SurfaceTarget::Texture2D: | ||
| 389 | return "2D"; | ||
| 390 | case SurfaceTarget::Texture3D: | ||
| 391 | return "3D"; | ||
| 392 | case SurfaceTarget::Texture1DArray: | ||
| 393 | return "1DArray"; | ||
| 394 | case SurfaceTarget::Texture2DArray: | ||
| 395 | return "2DArray"; | ||
| 396 | case SurfaceTarget::TextureCubemap: | ||
| 397 | return "Cube"; | ||
| 398 | case SurfaceTarget::TextureCubeArray: | ||
| 399 | return "CubeArray"; | ||
| 400 | default: | ||
| 401 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); | ||
| 402 | UNREACHABLE(); | ||
| 403 | return fmt::format("TUK({})", target); | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | u32 SurfaceParams::GetBlockSize() const { | ||
| 408 | const u32 x = 64U << block_width; | ||
| 409 | const u32 y = 8U << block_height; | ||
| 410 | const u32 z = 1U << block_depth; | ||
| 411 | return x * y * z; | ||
| 412 | } | ||
| 413 | |||
| 414 | std::pair<u32, u32> SurfaceParams::GetBlockXY() const { | ||
| 415 | const u32 x_pixels = 64U / GetBytesPerPixel(); | ||
| 416 | const u32 x = x_pixels << block_width; | ||
| 417 | const u32 y = 8U << block_height; | ||
| 418 | return {x, y}; | ||
| 419 | } | ||
| 420 | |||
| 421 | std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | ||
| 422 | const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 423 | const u32 block_size = GetBlockSize(); | ||
| 424 | const u32 block_index = offset / block_size; | ||
| 425 | const u32 gob_offset = offset % block_size; | ||
| 426 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); | ||
| 427 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | ||
| 428 | const u32 x_block_pixels = x_gob_pixels << block_width; | ||
| 429 | const u32 y_block_pixels = 8U << block_height; | ||
| 430 | const u32 z_block_pixels = 1U << block_depth; | ||
| 431 | const u32 x_blocks = div_ceil(width, x_block_pixels); | ||
| 432 | const u32 y_blocks = div_ceil(height, y_block_pixels); | ||
| 433 | const u32 z_blocks = div_ceil(depth, z_block_pixels); | ||
| 434 | const u32 base_x = block_index % x_blocks; | ||
| 435 | const u32 base_y = (block_index / x_blocks) % y_blocks; | ||
| 436 | const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; | ||
| 437 | u32 x = base_x * x_block_pixels; | ||
| 438 | u32 y = base_y * y_block_pixels; | ||
| 439 | u32 z = base_z * z_block_pixels; | ||
| 440 | z += gob_index >> block_height; | ||
| 441 | y += (gob_index * 8U) % y_block_pixels; | ||
| 442 | return {x, y, z}; | ||
| 443 | } | ||
| 444 | |||
| 445 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null | |||
| @@ -1,294 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "common/cityhash.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/fermi_2d.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/surface.h" | ||
| 17 | #include "video_core/textures/decoders.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | class FormatLookupTable; | ||
| 22 | |||
| 23 | class SurfaceParams { | ||
| 24 | public: | ||
| 25 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 26 | static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 27 | const Tegra::Texture::TICEntry& tic, | ||
| 28 | const VideoCommon::Shader::Sampler& entry); | ||
| 29 | |||
| 30 | /// Creates SurfaceCachedParams from an image configuration. | ||
| 31 | static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, | ||
| 32 | const Tegra::Texture::TICEntry& tic, | ||
| 33 | const VideoCommon::Shader::Image& entry); | ||
| 34 | |||
| 35 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 36 | static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); | ||
| 37 | |||
| 38 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 39 | static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 40 | std::size_t index); | ||
| 41 | |||
| 42 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 43 | static SurfaceParams CreateForFermiCopySurface( | ||
| 44 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 45 | |||
| 46 | /// Obtains the texture target from a shader's sampler entry. | ||
| 47 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 48 | const VideoCommon::Shader::Sampler& entry); | ||
| 49 | |||
| 50 | /// Obtains the texture target from a shader's sampler entry. | ||
| 51 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 52 | const VideoCommon::Shader::Image& entry); | ||
| 53 | |||
| 54 | std::size_t Hash() const { | ||
| 55 | return static_cast<std::size_t>( | ||
| 56 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool operator==(const SurfaceParams& rhs) const; | ||
| 60 | |||
| 61 | bool operator!=(const SurfaceParams& rhs) const { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | std::size_t GetGuestSizeInBytes() const { | ||
| 66 | return GetInnerMemorySize(false, false, false); | ||
| 67 | } | ||
| 68 | |||
| 69 | std::size_t GetHostSizeInBytes(bool is_converted) const { | ||
| 70 | if (!is_converted) { | ||
| 71 | return GetInnerMemorySize(true, false, false); | ||
| 72 | } | ||
| 73 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 74 | std::size_t host_size_in_bytes = 0; | ||
| 75 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 76 | host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); | ||
| 77 | } | ||
| 78 | return host_size_in_bytes; | ||
| 79 | } | ||
| 80 | |||
| 81 | u32 GetBlockAlignedWidth() const { | ||
| 82 | return Common::AlignUp(width, 64 / GetBytesPerPixel()); | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Returns the width of a given mipmap level. | ||
| 86 | u32 GetMipWidth(u32 level) const { | ||
| 87 | return std::max(1U, width >> level); | ||
| 88 | } | ||
| 89 | |||
| 90 | /// Returns the height of a given mipmap level. | ||
| 91 | u32 GetMipHeight(u32 level) const { | ||
| 92 | return std::max(1U, height >> level); | ||
| 93 | } | ||
| 94 | |||
| 95 | /// Returns the depth of a given mipmap level. | ||
| 96 | u32 GetMipDepth(u32 level) const { | ||
| 97 | return is_layered ? depth : std::max(1U, depth >> level); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// Returns the block height of a given mipmap level. | ||
| 101 | u32 GetMipBlockHeight(u32 level) const; | ||
| 102 | |||
| 103 | /// Returns the block depth of a given mipmap level. | ||
| 104 | u32 GetMipBlockDepth(u32 level) const; | ||
| 105 | |||
| 106 | /// Returns the best possible row/pitch alignment for the surface. | ||
| 107 | u32 GetRowAlignment(u32 level, bool is_converted) const { | ||
| 108 | const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); | ||
| 109 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | ||
| 110 | } | ||
| 111 | |||
| 112 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 113 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 114 | |||
| 115 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 116 | std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; | ||
| 117 | |||
| 118 | /// Returns the size in bytes in guest memory of a given mipmap level. | ||
| 119 | std::size_t GetGuestMipmapSize(u32 level) const { | ||
| 120 | return GetInnerMipmapMemorySize(level, false, false); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns the size in bytes in host memory (linear) of a given mipmap level. | ||
| 124 | std::size_t GetHostMipmapSize(u32 level) const { | ||
| 125 | return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); | ||
| 126 | } | ||
| 127 | |||
| 128 | std::size_t GetConvertedMipmapSize(u32 level) const; | ||
| 129 | |||
| 130 | /// Get this texture Tegra Block size in guest memory layout | ||
| 131 | u32 GetBlockSize() const; | ||
| 132 | |||
| 133 | /// Get X, Y coordinates max sizes of a single block. | ||
| 134 | std::pair<u32, u32> GetBlockXY() const; | ||
| 135 | |||
| 136 | /// Get the offset in x, y, z coordinates from a memory offset | ||
| 137 | std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; | ||
| 138 | |||
| 139 | /// Returns the size of a layer in bytes in guest memory. | ||
| 140 | std::size_t GetGuestLayerSize() const { | ||
| 141 | return GetLayerSize(false, false); | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 145 | std::size_t GetHostLayerSize(u32 level) const { | ||
| 146 | ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); | ||
| 147 | return GetInnerMipmapMemorySize(level, true, false); | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns the max possible mipmap that the texture can have in host gpu | ||
| 151 | u32 MaxPossibleMipmap() const { | ||
| 152 | const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; | ||
| 153 | const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; | ||
| 154 | const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); | ||
| 155 | if (target != VideoCore::Surface::SurfaceTarget::Texture3D) | ||
| 156 | return max_mipmap; | ||
| 157 | return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); | ||
| 158 | } | ||
| 159 | |||
| 160 | /// Returns if the guest surface is a compressed surface. | ||
| 161 | bool IsCompressed() const { | ||
| 162 | return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the default block width. | ||
| 166 | u32 GetDefaultBlockWidth() const { | ||
| 167 | return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||
| 168 | } | ||
| 169 | |||
| 170 | /// Returns the default block height. | ||
| 171 | u32 GetDefaultBlockHeight() const { | ||
| 172 | return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||
| 173 | } | ||
| 174 | |||
| 175 | /// Returns the bits per pixel. | ||
| 176 | u32 GetBitsPerPixel() const { | ||
| 177 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Returns the bytes per pixel. | ||
| 181 | u32 GetBytesPerPixel() const { | ||
| 182 | return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||
| 183 | } | ||
| 184 | |||
| 185 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 186 | bool IsPixelFormatZeta() const { | ||
| 187 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 188 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 189 | } | ||
| 190 | |||
| 191 | /// Returns is the surface is a TextureBuffer type of surface. | ||
| 192 | bool IsBuffer() const { | ||
| 193 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | ||
| 194 | } | ||
| 195 | |||
| 196 | /// Returns the number of layers in the surface. | ||
| 197 | std::size_t GetNumLayers() const { | ||
| 198 | return is_layered ? depth : 1; | ||
| 199 | } | ||
| 200 | |||
| 201 | /// Returns the debug name of the texture for use in graphic debuggers. | ||
| 202 | std::string TargetName() const; | ||
| 203 | |||
| 204 | // Helper used for out of class size calculations | ||
| 205 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||
| 206 | const u32 block_depth) { | ||
| 207 | return Common::AlignBits(out_size, | ||
| 208 | Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Converts a width from a type of surface into another. This helps represent the | ||
| 212 | /// equivalent value between compressed/non-compressed textures. | ||
| 213 | static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 214 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 215 | const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); | ||
| 216 | const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); | ||
| 217 | return (width * bw2 + bw1 - 1) / bw1; | ||
| 218 | } | ||
| 219 | |||
| 220 | /// Converts a height from a type of surface into another. This helps represent the | ||
| 221 | /// equivalent value between compressed/non-compressed textures. | ||
| 222 | static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 223 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 224 | const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); | ||
| 225 | const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); | ||
| 226 | return (height * bh2 + bh1 - 1) / bh1; | ||
| 227 | } | ||
| 228 | |||
| 229 | // Finds the maximun possible width between 2 2D layers of different formats | ||
| 230 | static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 231 | const u32 src_level, const u32 dst_level) { | ||
| 232 | const u32 bw1 = src_params.GetDefaultBlockWidth(); | ||
| 233 | const u32 bw2 = dst_params.GetDefaultBlockWidth(); | ||
| 234 | const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; | ||
| 235 | const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; | ||
| 236 | return std::min(t_src_width, t_dst_width); | ||
| 237 | } | ||
| 238 | |||
| 239 | // Finds the maximun possible height between 2 2D layers of different formats | ||
| 240 | static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 241 | const u32 src_level, const u32 dst_level) { | ||
| 242 | const u32 bh1 = src_params.GetDefaultBlockHeight(); | ||
| 243 | const u32 bh2 = dst_params.GetDefaultBlockHeight(); | ||
| 244 | const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; | ||
| 245 | const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; | ||
| 246 | return std::min(t_src_height, t_dst_height); | ||
| 247 | } | ||
| 248 | |||
| 249 | bool is_tiled; | ||
| 250 | bool srgb_conversion; | ||
| 251 | bool is_layered; | ||
| 252 | u32 block_width; | ||
| 253 | u32 block_height; | ||
| 254 | u32 block_depth; | ||
| 255 | u32 tile_width_spacing; | ||
| 256 | u32 width; | ||
| 257 | u32 height; | ||
| 258 | u32 depth; | ||
| 259 | u32 pitch; | ||
| 260 | u32 num_levels; | ||
| 261 | u32 emulated_levels; | ||
| 262 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 263 | VideoCore::Surface::SurfaceType type; | ||
| 264 | VideoCore::Surface::SurfaceTarget target; | ||
| 265 | |||
| 266 | private: | ||
| 267 | /// Returns the size of a given mipmap level inside a layer. | ||
| 268 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; | ||
| 269 | |||
| 270 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 271 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | ||
| 272 | return GetLayerSize(as_host_size, uncompressed) * | ||
| 273 | (layer_only ? 1U : (is_layered ? depth : 1U)); | ||
| 274 | } | ||
| 275 | |||
| 276 | /// Returns the size of a layer | ||
| 277 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | ||
| 278 | |||
| 279 | /// Returns true if these parameters are from a layered surface. | ||
| 280 | bool IsLayered() const; | ||
| 281 | }; | ||
| 282 | |||
| 283 | } // namespace VideoCommon | ||
| 284 | |||
| 285 | namespace std { | ||
| 286 | |||
| 287 | template <> | ||
| 288 | struct hash<VideoCommon::SurfaceParams> { | ||
| 289 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 290 | return k.Hash(); | ||
| 291 | } | ||
| 292 | }; | ||
| 293 | |||
| 294 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/texture_cache/surface_view.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | std::size_t ViewParams::Hash() const { | ||
| 13 | return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ | ||
| 14 | (static_cast<std::size_t>(base_level) << 24) ^ | ||
| 15 | (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); | ||
| 16 | } | ||
| 17 | |||
| 18 | bool ViewParams::operator==(const ViewParams& rhs) const { | ||
| 19 | return std::tie(base_layer, num_layers, base_level, num_levels, target) == | ||
| 20 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool ViewParams::operator!=(const ViewParams& rhs) const { | ||
| 24 | return !operator==(rhs); | ||
| 25 | } | ||
| 26 | |||
| 27 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 199f72732..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | struct ViewParams { | ||
| 16 | constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, | ||
| 17 | u32 num_layers_, u32 base_level_, u32 num_levels_) | ||
| 18 | : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, | ||
| 19 | base_level{base_level_}, num_levels{num_levels_} {} | ||
| 20 | |||
| 21 | std::size_t Hash() const; | ||
| 22 | |||
| 23 | bool operator==(const ViewParams& rhs) const; | ||
| 24 | bool operator!=(const ViewParams& rhs) const; | ||
| 25 | |||
| 26 | bool IsLayered() const { | ||
| 27 | switch (target) { | ||
| 28 | case VideoCore::Surface::SurfaceTarget::Texture1DArray: | ||
| 29 | case VideoCore::Surface::SurfaceTarget::Texture2DArray: | ||
| 30 | case VideoCore::Surface::SurfaceTarget::TextureCubemap: | ||
| 31 | case VideoCore::Surface::SurfaceTarget::TextureCubeArray: | ||
| 32 | return true; | ||
| 33 | default: | ||
| 34 | return false; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | VideoCore::Surface::SurfaceTarget target{}; | ||
| 39 | u32 base_layer{}; | ||
| 40 | u32 num_layers{}; | ||
| 41 | u32 base_level{}; | ||
| 42 | u32 num_levels{}; | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ViewBase { | ||
| 46 | public: | ||
| 47 | constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} | ||
| 48 | |||
| 49 | constexpr const ViewParams& GetViewParams() const { | ||
| 50 | return params; | ||
| 51 | } | ||
| 52 | |||
| 53 | protected: | ||
| 54 | ViewParams params; | ||
| 55 | }; | ||
| 56 | |||
| 57 | } // namespace VideoCommon | ||
| 58 | |||
| 59 | namespace std { | ||
| 60 | |||
| 61 | template <> | ||
| 62 | struct hash<VideoCommon::ViewParams> { | ||
| 63 | std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { | ||
| 64 | return k.Hash(); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 581d8dd5b..d1080300f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -6,1298 +6,1454 @@ | |||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <list> | 9 | #include <bit> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <set> | 12 | #include <optional> |
| 13 | #include <tuple> | 13 | #include <span> |
| 14 | #include <type_traits> | ||
| 14 | #include <unordered_map> | 15 | #include <unordered_map> |
| 16 | #include <utility> | ||
| 15 | #include <vector> | 17 | #include <vector> |
| 16 | 18 | ||
| 17 | #include <boost/container/small_vector.hpp> | 19 | #include <boost/container/small_vector.hpp> |
| 18 | #include <boost/icl/interval_map.hpp> | ||
| 19 | #include <boost/range/iterator_range.hpp> | ||
| 20 | 20 | ||
| 21 | #include "common/assert.h" | 21 | #include "common/alignment.h" |
| 22 | #include "common/common_funcs.h" | ||
| 22 | #include "common/common_types.h" | 23 | #include "common/common_types.h" |
| 23 | #include "common/math_util.h" | 24 | #include "common/logging/log.h" |
| 24 | #include "core/core.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "core/settings.h" | ||
| 27 | #include "video_core/compatible_formats.h" | 25 | #include "video_core/compatible_formats.h" |
| 26 | #include "video_core/delayed_destruction_ring.h" | ||
| 28 | #include "video_core/dirty_flags.h" | 27 | #include "video_core/dirty_flags.h" |
| 29 | #include "video_core/engines/fermi_2d.h" | 28 | #include "video_core/engines/fermi_2d.h" |
| 29 | #include "video_core/engines/kepler_compute.h" | ||
| 30 | #include "video_core/engines/maxwell_3d.h" | 30 | #include "video_core/engines/maxwell_3d.h" |
| 31 | #include "video_core/gpu.h" | ||
| 32 | #include "video_core/memory_manager.h" | 31 | #include "video_core/memory_manager.h" |
| 33 | #include "video_core/rasterizer_interface.h" | 32 | #include "video_core/rasterizer_interface.h" |
| 34 | #include "video_core/surface.h" | 33 | #include "video_core/surface.h" |
| 35 | #include "video_core/texture_cache/copy_params.h" | 34 | #include "video_core/texture_cache/descriptor_table.h" |
| 36 | #include "video_core/texture_cache/format_lookup_table.h" | 35 | #include "video_core/texture_cache/format_lookup_table.h" |
| 37 | #include "video_core/texture_cache/surface_base.h" | 36 | #include "video_core/texture_cache/formatter.h" |
| 38 | #include "video_core/texture_cache/surface_params.h" | 37 | #include "video_core/texture_cache/image_base.h" |
| 39 | #include "video_core/texture_cache/surface_view.h" | 38 | #include "video_core/texture_cache/image_info.h" |
| 40 | 39 | #include "video_core/texture_cache/image_view_base.h" | |
| 41 | namespace Tegra::Texture { | 40 | #include "video_core/texture_cache/image_view_info.h" |
| 42 | struct FullTextureInfo; | 41 | #include "video_core/texture_cache/render_targets.h" |
| 43 | } | 42 | #include "video_core/texture_cache/samples_helper.h" |
| 44 | 43 | #include "video_core/texture_cache/slot_vector.h" | |
| 45 | namespace VideoCore { | 44 | #include "video_core/texture_cache/types.h" |
| 46 | class RasterizerInterface; | 45 | #include "video_core/texture_cache/util.h" |
| 47 | } | 46 | #include "video_core/textures/texture.h" |
| 48 | 47 | ||
| 49 | namespace VideoCommon { | 48 | namespace VideoCommon { |
| 50 | 49 | ||
| 51 | using VideoCore::Surface::FormatCompatibility; | 50 | using Tegra::Texture::SwizzleSource; |
| 51 | using Tegra::Texture::TextureType; | ||
| 52 | using Tegra::Texture::TICEntry; | ||
| 53 | using Tegra::Texture::TSCEntry; | ||
| 54 | using VideoCore::Surface::GetFormatType; | ||
| 55 | using VideoCore::Surface::IsCopyCompatible; | ||
| 52 | using VideoCore::Surface::PixelFormat; | 56 | using VideoCore::Surface::PixelFormat; |
| 53 | using VideoCore::Surface::SurfaceTarget; | 57 | using VideoCore::Surface::PixelFormatFromDepthFormat; |
| 54 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | 58 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 59 | using VideoCore::Surface::SurfaceType; | ||
| 55 | 60 | ||
| 56 | template <typename TSurface, typename TView> | 61 | template <class P> |
| 57 | class TextureCache { | 62 | class TextureCache { |
| 58 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | 63 | /// Address shift for caching images into a hash table |
| 64 | static constexpr u64 PAGE_BITS = 20; | ||
| 65 | |||
| 66 | /// Enables debugging features to the texture cache | ||
| 67 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | ||
| 68 | /// Implement blits as copies between framebuffers | ||
| 69 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | ||
| 70 | /// True when some copies have to be emulated | ||
| 71 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||
| 72 | |||
| 73 | /// Image view ID for null descriptors | ||
| 74 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 75 | /// Sampler ID for bugged sampler ids | ||
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 77 | |||
| 78 | using Runtime = typename P::Runtime; | ||
| 79 | using Image = typename P::Image; | ||
| 80 | using ImageAlloc = typename P::ImageAlloc; | ||
| 81 | using ImageView = typename P::ImageView; | ||
| 82 | using Sampler = typename P::Sampler; | ||
| 83 | using Framebuffer = typename P::Framebuffer; | ||
| 84 | |||
| 85 | struct BlitImages { | ||
| 86 | ImageId dst_id; | ||
| 87 | ImageId src_id; | ||
| 88 | PixelFormat dst_format; | ||
| 89 | PixelFormat src_format; | ||
| 90 | }; | ||
| 91 | |||
| 92 | template <typename T> | ||
| 93 | struct IdentityHash { | ||
| 94 | [[nodiscard]] size_t operator()(T value) const noexcept { | ||
| 95 | return static_cast<size_t>(value); | ||
| 96 | } | ||
| 97 | }; | ||
| 59 | 98 | ||
| 60 | public: | 99 | public: |
| 61 | void InvalidateRegion(VAddr addr, std::size_t size) { | 100 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, |
| 62 | std::lock_guard lock{mutex}; | 101 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); |
| 63 | 102 | ||
| 64 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 103 | /// Notify the cache that a new frame has been queued |
| 65 | Unregister(surface); | 104 | void TickFrame(); |
| 66 | } | ||
| 67 | } | ||
| 68 | 105 | ||
| 69 | void OnCPUWrite(VAddr addr, std::size_t size) { | 106 | /// Return an unique mutually exclusive lock for the cache |
| 70 | std::lock_guard lock{mutex}; | 107 | [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); |
| 71 | 108 | ||
| 72 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 109 | /// Return a constant reference to the given image view id |
| 73 | if (surface->IsMemoryMarked()) { | 110 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 74 | UnmarkMemory(surface); | ||
| 75 | surface->SetSyncPending(true); | ||
| 76 | marked_for_unregister.emplace_back(surface); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | 111 | ||
| 81 | void SyncGuestHost() { | 112 | /// Return a reference to the given image view id |
| 82 | std::lock_guard lock{mutex}; | 113 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; |
| 83 | 114 | ||
| 84 | for (const auto& surface : marked_for_unregister) { | 115 | /// Fill image_view_ids with the graphics images in indices |
| 85 | if (surface->IsRegistered()) { | 116 | void FillGraphicsImageViews(std::span<const u32> indices, |
| 86 | surface->SetSyncPending(false); | 117 | std::span<ImageViewId> image_view_ids); |
| 87 | Unregister(surface); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | marked_for_unregister.clear(); | ||
| 91 | } | ||
| 92 | 118 | ||
| 93 | /** | 119 | /// Fill image_view_ids with the compute images in indices |
| 94 | * Guarantees that rendertargets don't unregister themselves if the | 120 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); |
| 95 | * collide. Protection is currently only done on 3D slices. | ||
| 96 | */ | ||
| 97 | void GuardRenderTargets(bool new_guard) { | ||
| 98 | guard_render_targets = new_guard; | ||
| 99 | } | ||
| 100 | 121 | ||
| 101 | void GuardSamplers(bool new_guard) { | 122 | /// Get the sampler from the graphics descriptor table in the specified index |
| 102 | guard_samplers = new_guard; | 123 | Sampler* GetGraphicsSampler(u32 index); |
| 103 | } | ||
| 104 | 124 | ||
| 105 | void FlushRegion(VAddr addr, std::size_t size) { | 125 | /// Get the sampler from the compute descriptor table in the specified index |
| 106 | std::lock_guard lock{mutex}; | 126 | Sampler* GetComputeSampler(u32 index); |
| 107 | 127 | ||
| 108 | auto surfaces = GetSurfacesInRegion(addr, size); | 128 | /// Refresh the state for graphics image view and sampler descriptors |
| 109 | if (surfaces.empty()) { | 129 | void SynchronizeGraphicsDescriptors(); |
| 110 | return; | ||
| 111 | } | ||
| 112 | std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { | ||
| 113 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 114 | }); | ||
| 115 | for (const auto& surface : surfaces) { | ||
| 116 | mutex.unlock(); | ||
| 117 | FlushSurface(surface); | ||
| 118 | mutex.lock(); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 130 | ||
| 122 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 131 | /// Refresh the state for compute image view and sampler descriptors |
| 123 | std::lock_guard lock{mutex}; | 132 | void SynchronizeComputeDescriptors(); |
| 124 | 133 | ||
| 125 | const auto surfaces = GetSurfacesInRegion(addr, size); | 134 | /// Update bound render targets and upload memory if necessary |
| 126 | return std::any_of(surfaces.cbegin(), surfaces.cend(), | 135 | /// @param is_clear True when the render targets are being used for clears |
| 127 | [](const TSurface& surface) { return surface->IsModified(); }); | 136 | void UpdateRenderTargets(bool is_clear); |
| 128 | } | ||
| 129 | 137 | ||
| 130 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | 138 | /// Find a framebuffer with the currently bound render targets |
| 131 | const VideoCommon::Shader::Sampler& entry) { | 139 | /// UpdateRenderTargets should be called before this |
| 132 | std::lock_guard lock{mutex}; | 140 | Framebuffer* GetFramebuffer(); |
| 133 | const auto gpu_addr{tic.Address()}; | ||
| 134 | if (!gpu_addr) { | ||
| 135 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 136 | } | ||
| 137 | 141 | ||
| 138 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 142 | /// Mark images in a range as modified from the CPU |
| 139 | if (!cpu_addr) { | 143 | void WriteMemory(VAddr cpu_addr, size_t size); |
| 140 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 141 | } | ||
| 142 | 144 | ||
| 143 | if (!IsTypeCompatible(tic.texture_type, entry)) { | 145 | /// Download contents of host images to guest memory in a region |
| 144 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 146 | void DownloadMemory(VAddr cpu_addr, size_t size); |
| 145 | } | ||
| 146 | 147 | ||
| 147 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 148 | /// Remove images in a region |
| 148 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | 149 | void UnmapMemory(VAddr cpu_addr, size_t size); |
| 149 | if (guard_samplers) { | ||
| 150 | sampled_textures.push_back(surface); | ||
| 151 | } | ||
| 152 | return view; | ||
| 153 | } | ||
| 154 | 150 | ||
| 155 | TView GetImageSurface(const Tegra::Texture::TICEntry& tic, | 151 | /// Blit an image with the given parameters |
| 156 | const VideoCommon::Shader::Image& entry) { | 152 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 157 | std::lock_guard lock{mutex}; | 153 | const Tegra::Engines::Fermi2D::Surface& src, |
| 158 | const auto gpu_addr{tic.Address()}; | 154 | const Tegra::Engines::Fermi2D::Config& copy); |
| 159 | if (!gpu_addr) { | ||
| 160 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 161 | } | ||
| 162 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 163 | if (!cpu_addr) { | ||
| 164 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 165 | } | ||
| 166 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | ||
| 167 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | ||
| 168 | if (guard_samplers) { | ||
| 169 | sampled_textures.push_back(surface); | ||
| 170 | } | ||
| 171 | return view; | ||
| 172 | } | ||
| 173 | 155 | ||
| 174 | bool TextureBarrier() { | 156 | /// Invalidate the contents of the color buffer index |
| 175 | const bool any_rt = | 157 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 176 | std::any_of(sampled_textures.begin(), sampled_textures.end(), | 158 | void InvalidateColorBuffer(size_t index); |
| 177 | [](const auto& surface) { return surface->IsRenderTarget(); }); | ||
| 178 | sampled_textures.clear(); | ||
| 179 | return any_rt; | ||
| 180 | } | ||
| 181 | 159 | ||
| 182 | TView GetDepthBufferSurface(bool preserve_contents) { | 160 | /// Invalidate the contents of the depth buffer |
| 183 | std::lock_guard lock{mutex}; | 161 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 184 | auto& dirty = maxwell3d.dirty; | 162 | void InvalidateDepthBuffer(); |
| 185 | if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { | ||
| 186 | return depth_buffer.view; | ||
| 187 | } | ||
| 188 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; | ||
| 189 | 163 | ||
| 190 | const auto& regs{maxwell3d.regs}; | 164 | /// Try to find a cached image view in the given CPU address |
| 191 | const auto gpu_addr{regs.zeta.Address()}; | 165 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); |
| 192 | if (!gpu_addr || !regs.zeta_enable) { | ||
| 193 | SetEmptyDepthBuffer(); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 197 | if (!cpu_addr) { | ||
| 198 | SetEmptyDepthBuffer(); | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; | ||
| 202 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); | ||
| 203 | if (depth_buffer.target) | ||
| 204 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 205 | depth_buffer.target = surface_view.first; | ||
| 206 | depth_buffer.view = surface_view.second; | ||
| 207 | if (depth_buffer.target) | ||
| 208 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); | ||
| 209 | return surface_view.second; | ||
| 210 | } | ||
| 211 | |||
| 212 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 213 | std::lock_guard lock{mutex}; | ||
| 214 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 215 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { | ||
| 216 | return render_targets[index].view; | ||
| 217 | } | ||
| 218 | maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; | ||
| 219 | 166 | ||
| 220 | const auto& regs{maxwell3d.regs}; | 167 | /// Return true when there are uncommitted images to be downloaded |
| 221 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 168 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 222 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 223 | SetEmptyColorBuffer(index); | ||
| 224 | return {}; | ||
| 225 | } | ||
| 226 | 169 | ||
| 227 | const auto& config{regs.rt[index]}; | 170 | /// Return true when the caller should wait for async downloads |
| 228 | const auto gpu_addr{config.Address()}; | 171 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 229 | if (!gpu_addr) { | ||
| 230 | SetEmptyColorBuffer(index); | ||
| 231 | return {}; | ||
| 232 | } | ||
| 233 | 172 | ||
| 234 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 173 | /// Commit asynchronous downloads |
| 235 | if (!cpu_addr) { | 174 | void CommitAsyncFlushes(); |
| 236 | SetEmptyColorBuffer(index); | 175 | |
| 237 | return {}; | 176 | /// Pop asynchronous downloads |
| 238 | } | 177 | void PopAsyncFlushes(); |
| 178 | |||
| 179 | /// Return true when a CPU region is modified from the GPU | ||
| 180 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 239 | 181 | ||
| 240 | auto surface_view = | 182 | private: |
| 241 | GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), | 183 | /// Iterate over all page indices in a range |
| 242 | preserve_contents, true); | 184 | template <typename Func> |
| 243 | if (render_targets[index].target) { | 185 | static void ForEachPage(VAddr addr, size_t size, Func&& func) { |
| 244 | auto& surface = render_targets[index].target; | 186 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 245 | surface->MarkAsRenderTarget(false, NO_RT); | 187 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; |
| 246 | const auto& cr_params = surface->GetSurfaceParams(); | 188 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { |
| 247 | if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 189 | if constexpr (RETURNS_BOOL) { |
| 248 | AsyncFlushSurface(surface); | 190 | if (func(page)) { |
| 191 | break; | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | func(page); | ||
| 249 | } | 195 | } |
| 250 | } | 196 | } |
| 251 | render_targets[index].target = surface_view.first; | ||
| 252 | render_targets[index].view = surface_view.second; | ||
| 253 | if (render_targets[index].target) | ||
| 254 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); | ||
| 255 | return surface_view.second; | ||
| 256 | } | 197 | } |
| 257 | 198 | ||
| 258 | void MarkColorBufferInUse(std::size_t index) { | 199 | /// Fills image_view_ids in the image views in indices |
| 259 | if (auto& render_target = render_targets[index].target) { | 200 | void FillImageViews(DescriptorTable<TICEntry>& table, |
| 260 | render_target->MarkAsModified(true, Tick()); | 201 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, |
| 261 | } | 202 | std::span<ImageViewId> image_view_ids); |
| 262 | } | ||
| 263 | 203 | ||
| 264 | void MarkDepthBufferInUse() { | 204 | /// Find or create an image view in the guest descriptor table |
| 265 | if (depth_buffer.target) { | 205 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, |
| 266 | depth_buffer.target->MarkAsModified(true, Tick()); | 206 | std::span<ImageViewId> cached_image_view_ids, u32 index); |
| 267 | } | ||
| 268 | } | ||
| 269 | 207 | ||
| 270 | void SetEmptyDepthBuffer() { | 208 | /// Find or create a framebuffer with the given render target parameters |
| 271 | if (depth_buffer.target == nullptr) { | 209 | FramebufferId GetFramebufferId(const RenderTargets& key); |
| 272 | return; | ||
| 273 | } | ||
| 274 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 275 | depth_buffer.target = nullptr; | ||
| 276 | depth_buffer.view = nullptr; | ||
| 277 | } | ||
| 278 | 210 | ||
| 279 | void SetEmptyColorBuffer(std::size_t index) { | 211 | /// Refresh the contents (pixel data) of an image |
| 280 | if (render_targets[index].target == nullptr) { | 212 | void RefreshContents(Image& image); |
| 281 | return; | ||
| 282 | } | ||
| 283 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||
| 284 | render_targets[index].target = nullptr; | ||
| 285 | render_targets[index].view = nullptr; | ||
| 286 | } | ||
| 287 | |||
| 288 | void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 289 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 290 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 291 | std::lock_guard lock{mutex}; | ||
| 292 | SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||
| 293 | SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||
| 294 | const GPUVAddr src_gpu_addr = src_config.Address(); | ||
| 295 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | ||
| 296 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | ||
| 297 | |||
| 298 | const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); | ||
| 299 | const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); | ||
| 300 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | ||
| 301 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; | ||
| 302 | ImageBlit(src_surface, dst_surface.second, copy_config); | ||
| 303 | dst_surface.first->MarkAsModified(true, Tick()); | ||
| 304 | } | ||
| 305 | |||
| 306 | TSurface TryFindFramebufferSurface(VAddr addr) const { | ||
| 307 | if (!addr) { | ||
| 308 | return nullptr; | ||
| 309 | } | ||
| 310 | const VAddr page = addr >> registry_page_bits; | ||
| 311 | const auto it = registry.find(page); | ||
| 312 | if (it == registry.end()) { | ||
| 313 | return nullptr; | ||
| 314 | } | ||
| 315 | const auto& list = it->second; | ||
| 316 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 317 | return surface->GetCpuAddr() == addr; | ||
| 318 | }); | ||
| 319 | return found != list.end() ? *found : nullptr; | ||
| 320 | } | ||
| 321 | 213 | ||
| 322 | u64 Tick() { | 214 | /// Upload data from guest to an image |
| 323 | return ++ticks; | 215 | template <typename MapBuffer> |
| 324 | } | 216 | void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); |
| 325 | 217 | ||
| 326 | void CommitAsyncFlushes() { | 218 | /// Find or create an image view from a guest descriptor |
| 327 | committed_flushes.push_back(uncommitted_flushes); | 219 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| 328 | uncommitted_flushes.reset(); | ||
| 329 | } | ||
| 330 | 220 | ||
| 331 | bool HasUncommittedFlushes() const { | 221 | /// Create a new image view from a guest descriptor |
| 332 | return uncommitted_flushes != nullptr; | 222 | [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); |
| 333 | } | ||
| 334 | 223 | ||
| 335 | bool ShouldWaitAsyncFlushes() const { | 224 | /// Find or create an image from the given parameters |
| 336 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | 225 | [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 337 | } | 226 | RelaxedOptions options = RelaxedOptions{}); |
| 338 | 227 | ||
| 339 | void PopAsyncFlushes() { | 228 | /// Find an image from the given parameters |
| 340 | if (committed_flushes.empty()) { | 229 | [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 341 | return; | 230 | RelaxedOptions options); |
| 342 | } | ||
| 343 | auto& flush_list = committed_flushes.front(); | ||
| 344 | if (!flush_list) { | ||
| 345 | committed_flushes.pop_front(); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | for (TSurface& surface : *flush_list) { | ||
| 349 | FlushSurface(surface); | ||
| 350 | } | ||
| 351 | committed_flushes.pop_front(); | ||
| 352 | } | ||
| 353 | 231 | ||
| 354 | protected: | 232 | /// Create an image from the given parameters |
| 355 | explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, | 233 | [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 356 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 234 | RelaxedOptions options); |
| 357 | bool is_astc_supported_) | ||
| 358 | : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 359 | gpu_memory{gpu_memory_} { | ||
| 360 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 361 | SetEmptyColorBuffer(i); | ||
| 362 | } | ||
| 363 | 235 | ||
| 364 | SetEmptyDepthBuffer(); | 236 | /// Create a new image and join perfectly matching existing images |
| 365 | staging_cache.SetSize(2); | 237 | /// Remove joined images from the cache |
| 238 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 366 | 239 | ||
| 367 | const auto make_siblings = [this](PixelFormat a, PixelFormat b) { | 240 | /// Return a blit image pair from the given guest blit parameters |
| 368 | siblings_table[static_cast<std::size_t>(a)] = b; | 241 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, |
| 369 | siblings_table[static_cast<std::size_t>(b)] = a; | 242 | const Tegra::Engines::Fermi2D::Surface& src); |
| 370 | }; | ||
| 371 | std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); | ||
| 372 | make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); | ||
| 373 | make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); | ||
| 374 | make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); | ||
| 375 | 243 | ||
| 376 | sampled_textures.reserve(64); | 244 | /// Find or create a sampler from a guest descriptor sampler |
| 377 | } | 245 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
| 378 | 246 | ||
| 379 | ~TextureCache() = default; | 247 | /// Find or create an image view for the given color buffer index |
| 248 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | ||
| 380 | 249 | ||
| 381 | virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; | 250 | /// Find or create an image view for the depth buffer |
| 251 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | ||
| 382 | 252 | ||
| 383 | virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, | 253 | /// Find or create a view for a render target with the given image parameters |
| 384 | const CopyParams& copy_params) = 0; | 254 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 255 | bool is_clear); | ||
| 385 | 256 | ||
| 386 | virtual void ImageBlit(TView& src_view, TView& dst_view, | 257 | /// Iterates over all the images in a region calling func |
| 387 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | 258 | template <typename Func> |
| 259 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||
| 388 | 260 | ||
| 389 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | 261 | /// Find or create an image view in the given image with the passed parameters |
| 390 | // and reading it from a separate buffer. | 262 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); |
| 391 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||
| 392 | 263 | ||
| 393 | void ManageRenderTargetUnregister(TSurface& surface) { | 264 | /// Register image in the page table |
| 394 | auto& dirty = maxwell3d.dirty; | 265 | void RegisterImage(ImageId image); |
| 395 | const u32 index = surface->GetRenderTarget(); | 266 | |
| 396 | if (index == DEPTH_RT) { | 267 | /// Unregister image from the page table |
| 397 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; | 268 | void UnregisterImage(ImageId image); |
| 398 | } else { | 269 | |
| 399 | dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; | 270 | /// Track CPU reads and writes for image |
| 400 | } | 271 | void TrackImage(ImageBase& image); |
| 401 | dirty.flags[VideoCommon::Dirty::RenderTargets] = true; | 272 | |
| 273 | /// Stop tracking CPU reads and writes for image | ||
| 274 | void UntrackImage(ImageBase& image); | ||
| 275 | |||
| 276 | /// Delete image from the cache | ||
| 277 | void DeleteImage(ImageId image); | ||
| 278 | |||
| 279 | /// Remove image views references from the cache | ||
| 280 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | ||
| 281 | |||
| 282 | /// Remove framebuffers using the given image views from the cache | ||
| 283 | void RemoveFramebuffers(std::span<const ImageViewId> removed_views); | ||
| 284 | |||
| 285 | /// Mark an image as modified from the GPU | ||
| 286 | void MarkModification(ImageBase& image) noexcept; | ||
| 287 | |||
| 288 | /// Synchronize image aliases, copying data if needed | ||
| 289 | void SynchronizeAliases(ImageId image_id); | ||
| 290 | |||
| 291 | /// Prepare an image to be used | ||
| 292 | void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); | ||
| 293 | |||
| 294 | /// Prepare an image view to be used | ||
| 295 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | ||
| 296 | |||
| 297 | /// Execute copies from one image to the other, even if they are incompatible | ||
| 298 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | ||
| 299 | |||
| 300 | /// Bind an image view as render target, downloading resources preemtively if needed | ||
| 301 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | ||
| 302 | |||
| 303 | /// Create a render target from a given image and image view parameters | ||
| 304 | [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( | ||
| 305 | ImageId, const ImageViewInfo& view_info); | ||
| 306 | |||
| 307 | /// Returns true if the current clear parameters clear the whole image of a given image view | ||
| 308 | [[nodiscard]] bool IsFullClear(ImageViewId id); | ||
| 309 | |||
| 310 | Runtime& runtime; | ||
| 311 | VideoCore::RasterizerInterface& rasterizer; | ||
| 312 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 313 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 314 | Tegra::MemoryManager& gpu_memory; | ||
| 315 | |||
| 316 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | ||
| 317 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | ||
| 318 | std::vector<SamplerId> graphics_sampler_ids; | ||
| 319 | std::vector<ImageViewId> graphics_image_view_ids; | ||
| 320 | |||
| 321 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | ||
| 322 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | ||
| 323 | std::vector<SamplerId> compute_sampler_ids; | ||
| 324 | std::vector<ImageViewId> compute_image_view_ids; | ||
| 325 | |||
| 326 | RenderTargets render_targets; | ||
| 327 | |||
| 328 | std::mutex mutex; | ||
| 329 | |||
| 330 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 331 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 332 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||
| 333 | |||
| 334 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | ||
| 335 | |||
| 336 | bool has_deleted_images = false; | ||
| 337 | |||
| 338 | SlotVector<Image> slot_images; | ||
| 339 | SlotVector<ImageView> slot_image_views; | ||
| 340 | SlotVector<ImageAlloc> slot_image_allocs; | ||
| 341 | SlotVector<Sampler> slot_samplers; | ||
| 342 | SlotVector<Framebuffer> slot_framebuffers; | ||
| 343 | |||
| 344 | // TODO: This data structure is not optimal and it should be reworked | ||
| 345 | std::vector<ImageId> uncommitted_downloads; | ||
| 346 | std::queue<std::vector<ImageId>> committed_downloads; | ||
| 347 | |||
| 348 | static constexpr size_t TICKS_TO_DESTROY = 6; | ||
| 349 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | ||
| 350 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | ||
| 351 | DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; | ||
| 352 | |||
| 353 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | ||
| 354 | |||
| 355 | u64 modification_tick = 0; | ||
| 356 | u64 frame_tick = 0; | ||
| 357 | }; | ||
| 358 | |||
| 359 | template <class P> | ||
| 360 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 361 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 362 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 363 | Tegra::MemoryManager& gpu_memory_) | ||
| 364 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 365 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 366 | // Configure null sampler | ||
| 367 | TSCEntry sampler_descriptor{}; | ||
| 368 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 369 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 370 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 371 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 372 | |||
| 373 | // Make sure the first index is reserved for the null resources | ||
| 374 | // This way the null resource becomes a compile time constant | ||
| 375 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 376 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 377 | } | ||
| 378 | |||
| 379 | template <class P> | ||
| 380 | void TextureCache<P>::TickFrame() { | ||
| 381 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | ||
| 382 | sentenced_images.Tick(); | ||
| 383 | sentenced_framebuffers.Tick(); | ||
| 384 | sentenced_image_view.Tick(); | ||
| 385 | ++frame_tick; | ||
| 386 | } | ||
| 387 | |||
| 388 | template <class P> | ||
| 389 | std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||
| 390 | return std::unique_lock{mutex}; | ||
| 391 | } | ||
| 392 | |||
| 393 | template <class P> | ||
| 394 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 395 | return slot_image_views[id]; | ||
| 396 | } | ||
| 397 | |||
| 398 | template <class P> | ||
| 399 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 400 | return slot_image_views[id]; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <class P> | ||
| 404 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 405 | std::span<ImageViewId> image_view_ids) { | ||
| 406 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 407 | } | ||
| 408 | |||
| 409 | template <class P> | ||
| 410 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 411 | std::span<ImageViewId> image_view_ids) { | ||
| 412 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 413 | } | ||
| 414 | |||
| 415 | template <class P> | ||
| 416 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 417 | [[unlikely]] if (index > graphics_sampler_table.Limit()) { | ||
| 418 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); | ||
| 419 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 420 | } | ||
| 421 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 422 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 423 | [[unlikely]] if (is_new) { | ||
| 424 | id = FindSampler(descriptor); | ||
| 402 | } | 425 | } |
| 426 | return &slot_samplers[id]; | ||
| 427 | } | ||
| 403 | 428 | ||
| 404 | void Register(TSurface surface) { | 429 | template <class P> |
| 405 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 430 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 406 | const std::size_t size = surface->GetSizeInBytes(); | 431 | [[unlikely]] if (index > compute_sampler_table.Limit()) { |
| 407 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 432 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); |
| 408 | if (!cpu_addr) { | 433 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 409 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | 434 | } |
| 410 | gpu_addr); | 435 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); |
| 411 | return; | 436 | SamplerId& id = compute_sampler_ids[index]; |
| 412 | } | 437 | [[unlikely]] if (is_new) { |
| 413 | surface->SetCpuAddr(*cpu_addr); | 438 | id = FindSampler(descriptor); |
| 414 | RegisterInnerCache(surface); | ||
| 415 | surface->MarkAsRegistered(true); | ||
| 416 | surface->SetMemoryMarked(true); | ||
| 417 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 418 | } | 439 | } |
| 440 | return &slot_samplers[id]; | ||
| 441 | } | ||
| 419 | 442 | ||
| 420 | void UnmarkMemory(TSurface surface) { | 443 | template <class P> |
| 421 | if (!surface->IsMemoryMarked()) { | 444 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
| 422 | return; | 445 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; |
| 423 | } | 446 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
| 424 | const std::size_t size = surface->GetSizeInBytes(); | 447 | const u32 tic_limit = maxwell3d.regs.tic.limit; |
| 425 | const VAddr cpu_addr = surface->GetCpuAddr(); | 448 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; |
| 426 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 449 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { |
| 427 | surface->SetMemoryMarked(false); | 450 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 428 | } | 451 | } |
| 452 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 453 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 454 | } | ||
| 455 | } | ||
| 429 | 456 | ||
| 430 | void Unregister(TSurface surface) { | 457 | template <class P> |
| 431 | if (guard_render_targets && surface->IsProtected()) { | 458 | void TextureCache<P>::SynchronizeComputeDescriptors() { |
| 432 | return; | 459 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; |
| 433 | } | 460 | const u32 tic_limit = kepler_compute.regs.tic.limit; |
| 434 | if (!guard_render_targets && surface->IsRenderTarget()) { | 461 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; |
| 435 | ManageRenderTargetUnregister(surface); | 462 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); |
| 436 | } | 463 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
| 437 | UnmarkMemory(surface); | 464 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 438 | if (surface->IsSyncPending()) { | ||
| 439 | marked_for_unregister.remove(surface); | ||
| 440 | surface->SetSyncPending(false); | ||
| 441 | } | ||
| 442 | UnregisterInnerCache(surface); | ||
| 443 | surface->MarkAsRegistered(false); | ||
| 444 | ReserveSurface(surface->GetSurfaceParams(), surface); | ||
| 445 | } | 465 | } |
| 466 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 467 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 468 | } | ||
| 469 | } | ||
| 446 | 470 | ||
| 447 | TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 471 | template <class P> |
| 448 | if (const auto surface = TryGetReservedSurface(params); surface) { | 472 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 449 | surface->SetGpuAddr(gpu_addr); | 473 | using namespace VideoCommon::Dirty; |
| 450 | return surface; | 474 | auto& flags = maxwell3d.dirty.flags; |
| 451 | } | 475 | if (!flags[Dirty::RenderTargets]) { |
| 452 | // No reserved surface available, create a new one and reserve it | 476 | return; |
| 453 | auto new_surface{CreateSurface(gpu_addr, params)}; | ||
| 454 | return new_surface; | ||
| 455 | } | 477 | } |
| 478 | flags[Dirty::RenderTargets] = false; | ||
| 456 | 479 | ||
| 457 | const bool is_astc_supported; | 480 | // Render target control is used on all render targets, so force look ups when this one is up |
| 481 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 482 | flags[Dirty::RenderTargetControl] = false; | ||
| 458 | 483 | ||
| 459 | private: | 484 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 460 | enum class RecycleStrategy : u32 { | 485 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 461 | Ignore = 0, | 486 | if (flags[Dirty::ColorBuffer0 + index] || force) { |
| 462 | Flush = 1, | 487 | flags[Dirty::ColorBuffer0 + index] = false; |
| 463 | BufferCopy = 3, | 488 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); |
| 464 | }; | 489 | } |
| 490 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 491 | } | ||
| 492 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 493 | flags[Dirty::ZetaBuffer] = false; | ||
| 494 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 495 | } | ||
| 496 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 497 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 465 | 498 | ||
| 466 | enum class DeductionType : u32 { | 499 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 467 | DeductionComplete, | 500 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
| 468 | DeductionIncomplete, | 501 | } |
| 469 | DeductionFailed, | 502 | render_targets.size = Extent2D{ |
| 503 | maxwell3d.regs.render_area.width, | ||
| 504 | maxwell3d.regs.render_area.height, | ||
| 470 | }; | 505 | }; |
| 506 | } | ||
| 471 | 507 | ||
| 472 | struct Deduction { | 508 | template <class P> |
| 473 | DeductionType type{DeductionType::DeductionFailed}; | 509 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { |
| 474 | TSurface surface{}; | 510 | return &slot_framebuffers[GetFramebufferId(render_targets)]; |
| 511 | } | ||
| 475 | 512 | ||
| 476 | bool Failed() const { | 513 | template <class P> |
| 477 | return type == DeductionType::DeductionFailed; | 514 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, |
| 478 | } | 515 | std::span<ImageViewId> cached_image_view_ids, |
| 516 | std::span<const u32> indices, | ||
| 517 | std::span<ImageViewId> image_view_ids) { | ||
| 518 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 519 | do { | ||
| 520 | has_deleted_images = false; | ||
| 521 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 522 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 523 | }); | ||
| 524 | } while (has_deleted_images); | ||
| 525 | } | ||
| 479 | 526 | ||
| 480 | bool Incomplete() const { | 527 | template <class P> |
| 481 | return type == DeductionType::DeductionIncomplete; | 528 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, |
| 482 | } | 529 | std::span<ImageViewId> cached_image_view_ids, |
| 530 | u32 index) { | ||
| 531 | if (index > table.Limit()) { | ||
| 532 | LOG_ERROR(HW_GPU, "Invalid image view index={}", index); | ||
| 533 | return NULL_IMAGE_VIEW_ID; | ||
| 534 | } | ||
| 535 | const auto [descriptor, is_new] = table.Read(index); | ||
| 536 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 537 | if (is_new) { | ||
| 538 | image_view_id = FindImageView(descriptor); | ||
| 539 | } | ||
| 540 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 541 | PrepareImageView(image_view_id, false, false); | ||
| 542 | } | ||
| 543 | return image_view_id; | ||
| 544 | } | ||
| 483 | 545 | ||
| 484 | bool IsDepth() const { | 546 | template <class P> |
| 485 | return surface->GetSurfaceParams().IsPixelFormatZeta(); | 547 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { |
| 486 | } | 548 | const auto [pair, is_new] = framebuffers.try_emplace(key); |
| 487 | }; | 549 | FramebufferId& framebuffer_id = pair->second; |
| 550 | if (!is_new) { | ||
| 551 | return framebuffer_id; | ||
| 552 | } | ||
| 553 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 554 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 555 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 556 | ImageView* const depth_buffer = | ||
| 557 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 558 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 559 | return framebuffer_id; | ||
| 560 | } | ||
| 488 | 561 | ||
| 489 | /** | 562 | template <class P> |
| 490 | * Takes care of selecting a proper strategy to deal with a texture recycle. | 563 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { |
| 491 | * | 564 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |
| 492 | * @param overlaps The overlapping surfaces registered in the cache. | 565 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 493 | * @param params The parameters on the new surface. | 566 | return; |
| 494 | * @param gpu_addr The starting address of the new surface. | ||
| 495 | * @param untopological Indicates to the recycler that the texture has no way | ||
| 496 | * to match the overlaps due to topological reasons. | ||
| 497 | **/ | ||
| 498 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 499 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||
| 500 | if (Settings::IsGPULevelExtreme()) { | ||
| 501 | return RecycleStrategy::Flush; | ||
| 502 | } | ||
| 503 | // 3D Textures decision | ||
| 504 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 505 | return RecycleStrategy::Flush; | ||
| 506 | } | ||
| 507 | for (const auto& s : overlaps) { | ||
| 508 | const auto& s_params = s->GetSurfaceParams(); | ||
| 509 | if (s_params.target == SurfaceTarget::Texture3D) { | ||
| 510 | return RecycleStrategy::Flush; | ||
| 511 | } | ||
| 512 | } | ||
| 513 | // Untopological decision | ||
| 514 | if (untopological == MatchTopologyResult::CompressUnmatch) { | ||
| 515 | return RecycleStrategy::Flush; | ||
| 516 | } | ||
| 517 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 518 | return RecycleStrategy::Flush; | ||
| 519 | } | ||
| 520 | return RecycleStrategy::Ignore; | ||
| 521 | } | ||
| 522 | |||
| 523 | /** | ||
| 524 | * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented | ||
| 525 | * strategies: Ignore and Flush. | ||
| 526 | * | ||
| 527 | * - Ignore: Just unregisters all the overlaps and loads the new texture. | ||
| 528 | * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. | ||
| 529 | * | ||
| 530 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 531 | * @param params The parameters for the new surface. | ||
| 532 | * @param gpu_addr The starting address of the new surface. | ||
| 533 | * @param preserve_contents Indicates that the new surface should be loaded from memory or left | ||
| 534 | * blank. | ||
| 535 | * @param untopological Indicates to the recycler that the texture has no way to match the | ||
| 536 | * overlaps due to topological reasons. | ||
| 537 | **/ | ||
| 538 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 539 | const GPUVAddr gpu_addr, const bool preserve_contents, | ||
| 540 | const MatchTopologyResult untopological) { | ||
| 541 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | ||
| 542 | for (auto& surface : overlaps) { | ||
| 543 | Unregister(surface); | ||
| 544 | } | ||
| 545 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||
| 546 | case RecycleStrategy::Ignore: { | ||
| 547 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 548 | } | ||
| 549 | case RecycleStrategy::Flush: { | ||
| 550 | std::sort(overlaps.begin(), overlaps.end(), | ||
| 551 | [](const TSurface& a, const TSurface& b) -> bool { | ||
| 552 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 553 | }); | ||
| 554 | for (auto& surface : overlaps) { | ||
| 555 | FlushSurface(surface); | ||
| 556 | } | ||
| 557 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 558 | } | 567 | } |
| 559 | case RecycleStrategy::BufferCopy: { | 568 | image.flags |= ImageFlagBits::CpuModified; |
| 560 | auto new_surface = GetUncachedSurface(gpu_addr, params); | 569 | UntrackImage(image); |
| 561 | BufferCopy(overlaps[0], new_surface); | 570 | }); |
| 562 | return {new_surface, new_surface->GetMainView()}; | 571 | } |
| 572 | |||
| 573 | template <class P> | ||
| 574 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 575 | std::vector<ImageId> images; | ||
| 576 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 577 | // Skip images that were not modified from the GPU | ||
| 578 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 579 | return; | ||
| 563 | } | 580 | } |
| 564 | default: { | 581 | // Skip images that .are. modified from the CPU |
| 565 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | 582 | // We don't want to write sensitive data from the guest |
| 566 | return InitializeSurface(gpu_addr, params, do_load); | 583 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 584 | return; | ||
| 567 | } | 585 | } |
| 586 | if (image.info.num_samples > 1) { | ||
| 587 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 588 | return; | ||
| 568 | } | 589 | } |
| 590 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 591 | images.push_back(image_id); | ||
| 592 | }); | ||
| 593 | if (images.empty()) { | ||
| 594 | return; | ||
| 595 | } | ||
| 596 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 597 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 598 | }); | ||
| 599 | for (const ImageId image_id : images) { | ||
| 600 | Image& image = slot_images[image_id]; | ||
| 601 | auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | ||
| 602 | const auto copies = FullDownloadCopies(image.info); | ||
| 603 | image.DownloadMemory(map, 0, copies); | ||
| 604 | runtime.Finish(); | ||
| 605 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | ||
| 569 | } | 606 | } |
| 607 | } | ||
| 570 | 608 | ||
| 571 | /** | 609 | template <class P> |
| 572 | * Takes a single surface and recreates into another that may differ in | 610 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 573 | * format, target or width alignment. | 611 | std::vector<ImageId> deleted_images; |
| 574 | * | 612 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 575 | * @param current_surface The registered surface in the cache which we want to convert. | 613 | for (const ImageId id : deleted_images) { |
| 576 | * @param params The new surface params which we'll use to recreate the surface. | 614 | Image& image = slot_images[id]; |
| 577 | * @param is_render Whether or not the surface is a render target. | 615 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 578 | **/ | 616 | UntrackImage(image); |
| 579 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | 617 | } |
| 580 | bool is_render) { | 618 | UnregisterImage(id); |
| 581 | const auto gpu_addr = current_surface->GetGpuAddr(); | 619 | DeleteImage(id); |
| 582 | const auto& cr_params = current_surface->GetSurfaceParams(); | 620 | } |
| 583 | TSurface new_surface; | 621 | } |
| 584 | if (cr_params.pixel_format != params.pixel_format && !is_render && | ||
| 585 | GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { | ||
| 586 | SurfaceParams new_params = params; | ||
| 587 | new_params.pixel_format = cr_params.pixel_format; | ||
| 588 | new_params.type = cr_params.type; | ||
| 589 | new_surface = GetUncachedSurface(gpu_addr, new_params); | ||
| 590 | } else { | ||
| 591 | new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 592 | } | ||
| 593 | const SurfaceParams& final_params = new_surface->GetSurfaceParams(); | ||
| 594 | if (cr_params.type != final_params.type) { | ||
| 595 | if (Settings::IsGPULevelExtreme()) { | ||
| 596 | BufferCopy(current_surface, new_surface); | ||
| 597 | } | ||
| 598 | } else { | ||
| 599 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | ||
| 600 | for (auto& brick : bricks) { | ||
| 601 | TryCopyImage(current_surface, new_surface, brick); | ||
| 602 | } | ||
| 603 | } | ||
| 604 | Unregister(current_surface); | ||
| 605 | Register(new_surface); | ||
| 606 | new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||
| 607 | return {new_surface, new_surface->GetMainView()}; | ||
| 608 | } | ||
| 609 | |||
| 610 | /** | ||
| 611 | * Takes a single surface and checks with the new surface's params if it's an exact | ||
| 612 | * match, we return the main view of the registered surface. If its formats don't | ||
| 613 | * match, we rebuild the surface. We call this last method a `Mirage`. If formats | ||
| 614 | * match but the targets don't, we create an overview View of the registered surface. | ||
| 615 | * | ||
| 616 | * @param current_surface The registered surface in the cache which we want to convert. | ||
| 617 | * @param params The new surface params which we want to check. | ||
| 618 | * @param is_render Whether or not the surface is a render target. | ||
| 619 | **/ | ||
| 620 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | ||
| 621 | const SurfaceParams& params, bool is_render) { | ||
| 622 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 623 | const bool matches_target = current_surface->MatchTarget(params.target); | ||
| 624 | const auto match_check = [&]() -> std::pair<TSurface, TView> { | ||
| 625 | if (matches_target) { | ||
| 626 | return {current_surface, current_surface->GetMainView()}; | ||
| 627 | } | ||
| 628 | return {current_surface, current_surface->EmplaceOverview(params)}; | ||
| 629 | }; | ||
| 630 | if (!is_mirage) { | ||
| 631 | return match_check(); | ||
| 632 | } | ||
| 633 | if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { | ||
| 634 | return match_check(); | ||
| 635 | } | ||
| 636 | return RebuildSurface(current_surface, params, is_render); | ||
| 637 | } | ||
| 638 | |||
| 639 | /** | ||
| 640 | * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate | ||
| 641 | * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps | ||
| 642 | * of the new surface, if they all match we end up recreating a surface for them, | ||
| 643 | * else we return nothing. | ||
| 644 | * | ||
| 645 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 646 | * @param params The parameters on the new surface. | ||
| 647 | * @param gpu_addr The starting address of the new surface. | ||
| 648 | **/ | ||
| 649 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, | ||
| 650 | const SurfaceParams& params, | ||
| 651 | GPUVAddr gpu_addr) { | ||
| 652 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 653 | return std::nullopt; | ||
| 654 | } | ||
| 655 | const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; | ||
| 656 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 657 | 622 | ||
| 658 | if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { | 623 | template <class P> |
| 659 | LoadSurface(new_surface); | 624 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 660 | for (const auto& surface : overlaps) { | 625 | const Tegra::Engines::Fermi2D::Surface& src, |
| 661 | Unregister(surface); | 626 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 662 | } | 627 | const BlitImages images = GetBlitImages(dst, src); |
| 663 | Register(new_surface); | 628 | const ImageId dst_id = images.dst_id; |
| 664 | return {{new_surface, new_surface->GetMainView()}}; | 629 | const ImageId src_id = images.src_id; |
| 665 | } | 630 | PrepareImage(src_id, false, false); |
| 631 | PrepareImage(dst_id, true, false); | ||
| 632 | |||
| 633 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 634 | const ImageBase& src_image = slot_images[src_id]; | ||
| 635 | |||
| 636 | // TODO: Deduplicate | ||
| 637 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 638 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 639 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 640 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 641 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 642 | const std::array src_region{ | ||
| 643 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 644 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 645 | }; | ||
| 666 | 646 | ||
| 667 | std::size_t passed_tests = 0; | 647 | const std::optional src_base = src_image.TryFindBase(src.Address()); |
| 668 | for (auto& surface : overlaps) { | 648 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; |
| 669 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 649 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); |
| 670 | const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | 650 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); |
| 671 | if (!mipmap_layer) { | 651 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); |
| 672 | continue; | 652 | const std::array dst_region{ |
| 673 | } | 653 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, |
| 674 | const auto [base_layer, base_mipmap] = *mipmap_layer; | 654 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, |
| 675 | if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { | 655 | }; |
| 676 | continue; | ||
| 677 | } | ||
| 678 | ++passed_tests; | ||
| 679 | |||
| 680 | // Copy all mipmaps and layers | ||
| 681 | const u32 block_width = params.GetDefaultBlockWidth(); | ||
| 682 | const u32 block_height = params.GetDefaultBlockHeight(); | ||
| 683 | for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { | ||
| 684 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | ||
| 685 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 686 | if (width < block_width || height < block_height) { | ||
| 687 | // Current APIs forbid copying small compressed textures, avoid errors | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | ||
| 691 | src_params.depth); | ||
| 692 | TryCopyImage(surface, new_surface, copy_params); | ||
| 693 | } | ||
| 694 | } | ||
| 695 | if (passed_tests == 0) { | ||
| 696 | return std::nullopt; | ||
| 697 | } | ||
| 698 | if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||
| 699 | // In Accurate GPU all tests should pass, else we recycle | ||
| 700 | return std::nullopt; | ||
| 701 | } | ||
| 702 | 656 | ||
| 703 | const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); | 657 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. |
| 704 | for (const auto& surface : overlaps) { | 658 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
| 705 | Unregister(surface); | 659 | if constexpr (FRAMEBUFFER_BLITS) { |
| 706 | } | 660 | // OpenGL blits from framebuffers, not images |
| 661 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 662 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 663 | copy.filter, copy.operation); | ||
| 664 | } else { | ||
| 665 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 666 | // Provide a framebuffer in case it's necessary | ||
| 667 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 668 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 669 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 670 | copy.operation); | ||
| 671 | } | ||
| 672 | } | ||
| 707 | 673 | ||
| 708 | new_surface->MarkAsModified(modified, Tick()); | 674 | template <class P> |
| 709 | Register(new_surface); | 675 | void TextureCache<P>::InvalidateColorBuffer(size_t index) { |
| 710 | return {{new_surface, new_surface->GetMainView()}}; | 676 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 711 | } | 677 | color_buffer_id = FindColorBuffer(index, false); |
| 712 | 678 | if (!color_buffer_id) { | |
| 713 | /** | 679 | LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); |
| 714 | * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D | 680 | return; |
| 715 | * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of | 681 | } |
| 716 | * the HLE methods. | 682 | // When invalidating a color buffer, the old contents are no longer relevant |
| 717 | * | 683 | ImageView& color_buffer = slot_image_views[color_buffer_id]; |
| 718 | * @param overlaps The overlapping surfaces registered in the cache. | 684 | Image& image = slot_images[color_buffer.image_id]; |
| 719 | * @param params The parameters on the new surface. | 685 | image.flags &= ~ImageFlagBits::CpuModified; |
| 720 | * @param gpu_addr The starting address of the new surface. | 686 | image.flags &= ~ImageFlagBits::GpuModified; |
| 721 | * @param cpu_addr The starting address of the new surface on physical memory. | ||
| 722 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 723 | * left blank. | ||
| 724 | */ | ||
| 725 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, | ||
| 726 | const SurfaceParams& params, | ||
| 727 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 728 | bool preserve_contents) { | ||
| 729 | if (params.target != SurfaceTarget::Texture3D) { | ||
| 730 | for (const auto& surface : overlaps) { | ||
| 731 | if (!surface->MatchTarget(params.target)) { | ||
| 732 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | ||
| 733 | if (Settings::IsGPULevelExtreme()) { | ||
| 734 | return std::nullopt; | ||
| 735 | } | ||
| 736 | Unregister(surface); | ||
| 737 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 738 | } | ||
| 739 | return std::nullopt; | ||
| 740 | } | ||
| 741 | if (surface->GetCpuAddr() != cpu_addr) { | ||
| 742 | continue; | ||
| 743 | } | ||
| 744 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||
| 745 | return std::make_pair(surface, surface->GetMainView()); | ||
| 746 | } | ||
| 747 | } | ||
| 748 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 749 | } | ||
| 750 | 687 | ||
| 751 | if (params.num_levels > 1) { | 688 | runtime.InvalidateColorBuffer(color_buffer, index); |
| 752 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | 689 | } |
| 753 | return std::nullopt; | ||
| 754 | } | ||
| 755 | 690 | ||
| 756 | if (overlaps.size() == 1) { | 691 | template <class P> |
| 757 | const auto& surface = overlaps[0]; | 692 | void TextureCache<P>::InvalidateDepthBuffer() { |
| 758 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | 693 | ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; |
| 759 | // Don't attempt to render to textures with more than one level for now | 694 | depth_buffer_id = FindDepthBuffer(false); |
| 760 | // The texture has to be to the right or the sample address if we want to render to it | 695 | if (!depth_buffer_id) { |
| 761 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | 696 | LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); |
| 762 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | 697 | return; |
| 763 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 698 | } |
| 764 | if (slice < overlap_params.depth) { | 699 | // When invalidating the depth buffer, the old contents are no longer relevant |
| 765 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | 700 | ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; |
| 766 | return std::make_pair(std::move(surface), std::move(view)); | 701 | image.flags &= ~ImageFlagBits::CpuModified; |
| 767 | } | 702 | image.flags &= ~ImageFlagBits::GpuModified; |
| 768 | } | ||
| 769 | } | ||
| 770 | 703 | ||
| 771 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | 704 | ImageView& depth_buffer = slot_image_views[depth_buffer_id]; |
| 772 | bool modified = false; | 705 | runtime.InvalidateDepthBuffer(depth_buffer); |
| 706 | } | ||
| 773 | 707 | ||
| 774 | for (auto& surface : overlaps) { | 708 | template <class P> |
| 775 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 709 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { |
| 776 | if (src_params.target != SurfaceTarget::Texture2D || | 710 | // TODO: Properly implement this |
| 777 | src_params.height != params.height || | 711 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); |
| 778 | src_params.block_depth != params.block_depth || | 712 | if (it == page_table.end()) { |
| 779 | src_params.block_height != params.block_height) { | 713 | return nullptr; |
| 780 | return std::nullopt; | 714 | } |
| 781 | } | 715 | const auto& image_ids = it->second; |
| 782 | modified |= surface->IsModified(); | 716 | for (const ImageId image_id : image_ids) { |
| 783 | 717 | const ImageBase& image = slot_images[image_id]; | |
| 784 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | 718 | if (image.cpu_addr != cpu_addr) { |
| 785 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 719 | continue; |
| 786 | const u32 width = params.width; | ||
| 787 | const u32 height = params.height; | ||
| 788 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 789 | TryCopyImage(surface, new_surface, copy_params); | ||
| 790 | } | 720 | } |
| 791 | for (const auto& surface : overlaps) { | 721 | if (image.image_view_ids.empty()) { |
| 792 | Unregister(surface); | 722 | continue; |
| 793 | } | 723 | } |
| 794 | new_surface->MarkAsModified(modified, Tick()); | 724 | return &slot_image_views[image.image_view_ids.at(0)]; |
| 795 | Register(new_surface); | 725 | } |
| 796 | 726 | return nullptr; | |
| 797 | TView view = new_surface->GetMainView(); | 727 | } |
| 798 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 799 | } | ||
| 800 | |||
| 801 | /** | ||
| 802 | * Gets the starting address and parameters of a candidate surface and tries | ||
| 803 | * to find a matching surface within the cache. This is done in 3 big steps: | ||
| 804 | * | ||
| 805 | * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | ||
| 806 | * | ||
| 807 | * 2. Check if there are any overlaps at all, if there are none, we just load the texture from | ||
| 808 | * memory else we move to step 3. | ||
| 809 | * | ||
| 810 | * 3. Consists of figuring out the relationship between the candidate texture and the | ||
| 811 | * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If | ||
| 812 | * there's many, we just try to reconstruct a new surface out of them based on the | ||
| 813 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we | ||
| 814 | * have to check if the candidate is a view (layer/mipmap) of the overlap or if the | ||
| 815 | * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct | ||
| 816 | * a new surface. | ||
| 817 | * | ||
| 818 | * @param gpu_addr The starting address of the candidate surface. | ||
| 819 | * @param params The parameters on the candidate surface. | ||
| 820 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 821 | * left blank. | ||
| 822 | * @param is_render Whether or not the surface is a render target. | ||
| 823 | **/ | ||
| 824 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||
| 825 | const SurfaceParams& params, bool preserve_contents, | ||
| 826 | bool is_render) { | ||
| 827 | // Step 1 | ||
| 828 | // Check Level 1 Cache for a fast structural match. If candidate surface | ||
| 829 | // matches at certain level we are pretty much done. | ||
| 830 | if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { | ||
| 831 | TSurface& current_surface = iter->second; | ||
| 832 | const auto topological_result = current_surface->MatchesTopology(params); | ||
| 833 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 834 | VectorSurface overlaps{current_surface}; | ||
| 835 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 836 | topological_result); | ||
| 837 | } | ||
| 838 | 728 | ||
| 839 | const auto struct_result = current_surface->MatchesStructure(params); | 729 | template <class P> |
| 840 | if (struct_result != MatchStructureResult::None) { | 730 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { |
| 841 | const auto& old_params = current_surface->GetSurfaceParams(); | 731 | return !uncommitted_downloads.empty(); |
| 842 | const bool not_3d = params.target != SurfaceTarget::Texture3D && | 732 | } |
| 843 | old_params.target != SurfaceTarget::Texture3D; | ||
| 844 | if (not_3d || current_surface->MatchTarget(params.target)) { | ||
| 845 | if (struct_result == MatchStructureResult::FullMatch) { | ||
| 846 | return ManageStructuralMatch(current_surface, params, is_render); | ||
| 847 | } else { | ||
| 848 | return RebuildSurface(current_surface, params, is_render); | ||
| 849 | } | ||
| 850 | } | ||
| 851 | } | ||
| 852 | } | ||
| 853 | 733 | ||
| 854 | // Step 2 | 734 | template <class P> |
| 855 | // Obtain all possible overlaps in the memory region | 735 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 856 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 736 | return !committed_downloads.empty() && !committed_downloads.front().empty(); |
| 857 | auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; | 737 | } |
| 858 | 738 | ||
| 859 | // If none are found, we are done. we just load the surface and create it. | 739 | template <class P> |
| 860 | if (overlaps.empty()) { | 740 | void TextureCache<P>::CommitAsyncFlushes() { |
| 861 | return InitializeSurface(gpu_addr, params, preserve_contents); | 741 | // This is intentionally passing the value by copy |
| 862 | } | 742 | committed_downloads.push(uncommitted_downloads); |
| 743 | uncommitted_downloads.clear(); | ||
| 744 | } | ||
| 863 | 745 | ||
| 864 | // Step 3 | 746 | template <class P> |
| 865 | // Now we need to figure the relationship between the texture and its overlaps | 747 | void TextureCache<P>::PopAsyncFlushes() { |
| 866 | // we do a topological test to ensure we can find some relationship. If it fails | 748 | if (committed_downloads.empty()) { |
| 867 | // immediately recycle the texture | 749 | return; |
| 868 | for (const auto& surface : overlaps) { | 750 | } |
| 869 | const auto topological_result = surface->MatchesTopology(params); | 751 | const std::span<const ImageId> download_ids = committed_downloads.front(); |
| 870 | if (topological_result != MatchTopologyResult::FullMatch) { | 752 | if (download_ids.empty()) { |
| 871 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 753 | committed_downloads.pop(); |
| 872 | topological_result); | 754 | return; |
| 873 | } | 755 | } |
| 874 | } | 756 | size_t total_size_bytes = 0; |
| 757 | for (const ImageId image_id : download_ids) { | ||
| 758 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 759 | } | ||
| 760 | auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | ||
| 761 | size_t buffer_offset = 0; | ||
| 762 | for (const ImageId image_id : download_ids) { | ||
| 763 | Image& image = slot_images[image_id]; | ||
| 764 | const auto copies = FullDownloadCopies(image.info); | ||
| 765 | image.DownloadMemory(download_map, buffer_offset, copies); | ||
| 766 | buffer_offset += image.unswizzled_size_bytes; | ||
| 767 | } | ||
| 768 | // Wait for downloads to finish | ||
| 769 | runtime.Finish(); | ||
| 770 | |||
| 771 | buffer_offset = 0; | ||
| 772 | const std::span<u8> download_span = download_map.Span(); | ||
| 773 | for (const ImageId image_id : download_ids) { | ||
| 774 | const ImageBase& image = slot_images[image_id]; | ||
| 775 | const auto copies = FullDownloadCopies(image.info); | ||
| 776 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | ||
| 777 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | ||
| 778 | buffer_offset += image.unswizzled_size_bytes; | ||
| 779 | } | ||
| 780 | committed_downloads.pop(); | ||
| 781 | } | ||
| 875 | 782 | ||
| 876 | // Manage 3D textures | 783 | template <class P> |
| 877 | if (params.block_depth > 0) { | 784 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 878 | auto surface = | 785 | bool is_modified = false; |
| 879 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 786 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| 880 | if (surface) { | 787 | if (False(image.flags & ImageFlagBits::GpuModified)) { |
| 881 | return *surface; | 788 | return false; |
| 882 | } | ||
| 883 | } | 789 | } |
| 790 | is_modified = true; | ||
| 791 | return true; | ||
| 792 | }); | ||
| 793 | return is_modified; | ||
| 794 | } | ||
| 884 | 795 | ||
| 885 | // Split cases between 1 overlap or many. | 796 | template <class P> |
| 886 | if (overlaps.size() == 1) { | 797 | void TextureCache<P>::RefreshContents(Image& image) { |
| 887 | TSurface current_surface = overlaps[0]; | 798 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 888 | // First check if the surface is within the overlap. If not, it means | 799 | // Only upload modified images |
| 889 | // two things either the candidate surface is a supertexture of the overlap | 800 | return; |
| 890 | // or they don't match in any known way. | 801 | } |
| 891 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | 802 | image.flags &= ~ImageFlagBits::CpuModified; |
| 892 | const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); | 803 | TrackImage(image); |
| 893 | if (view) { | ||
| 894 | return *view; | ||
| 895 | } | ||
| 896 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 897 | MatchTopologyResult::FullMatch); | ||
| 898 | } | ||
| 899 | // Now we check if the candidate is a mipmap/layer of the overlap | ||
| 900 | std::optional<TView> view = | ||
| 901 | current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||
| 902 | if (view) { | ||
| 903 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 904 | if (is_mirage) { | ||
| 905 | // On a mirage view, we need to recreate the surface under this new view | ||
| 906 | // and then obtain a view again. | ||
| 907 | SurfaceParams new_params = current_surface->GetSurfaceParams(); | ||
| 908 | const u32 wh = SurfaceParams::ConvertWidth( | ||
| 909 | new_params.width, new_params.pixel_format, params.pixel_format); | ||
| 910 | const u32 hh = SurfaceParams::ConvertHeight( | ||
| 911 | new_params.height, new_params.pixel_format, params.pixel_format); | ||
| 912 | new_params.width = wh; | ||
| 913 | new_params.height = hh; | ||
| 914 | new_params.pixel_format = params.pixel_format; | ||
| 915 | std::pair<TSurface, TView> pair = | ||
| 916 | RebuildSurface(current_surface, new_params, is_render); | ||
| 917 | std::optional<TView> mirage_view = | ||
| 918 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||
| 919 | if (mirage_view) | ||
| 920 | return {pair.first, *mirage_view}; | ||
| 921 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 922 | MatchTopologyResult::FullMatch); | ||
| 923 | } | ||
| 924 | return {current_surface, *view}; | ||
| 925 | } | ||
| 926 | } else { | ||
| 927 | // If there are many overlaps, odds are they are subtextures of the candidate | ||
| 928 | // surface. We try to construct a new surface based on the candidate parameters, | ||
| 929 | // using the overlaps. If a single overlap fails, this will fail. | ||
| 930 | std::optional<std::pair<TSurface, TView>> view = | ||
| 931 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 932 | if (view) { | ||
| 933 | return *view; | ||
| 934 | } | ||
| 935 | } | ||
| 936 | // We failed all the tests, recycle the overlaps into a new texture. | ||
| 937 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 938 | MatchTopologyResult::FullMatch); | ||
| 939 | } | ||
| 940 | |||
| 941 | /** | ||
| 942 | * Gets the starting address and parameters of a candidate surface and tries to find a | ||
| 943 | * matching surface within the cache that's similar to it. If there are many textures | ||
| 944 | * or the texture found if entirely incompatible, it will fail. If no texture is found, the | ||
| 945 | * blit will be unsuccessful. | ||
| 946 | * | ||
| 947 | * @param gpu_addr The starting address of the candidate surface. | ||
| 948 | * @param params The parameters on the candidate surface. | ||
| 949 | **/ | ||
| 950 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 951 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 952 | |||
| 953 | if (!cpu_addr) { | ||
| 954 | Deduction result{}; | ||
| 955 | result.type = DeductionType::DeductionFailed; | ||
| 956 | return result; | ||
| 957 | } | ||
| 958 | 804 | ||
| 959 | if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { | 805 | if (image.info.num_samples > 1) { |
| 960 | TSurface& current_surface = iter->second; | 806 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 961 | const auto topological_result = current_surface->MatchesTopology(params); | 807 | return; |
| 962 | if (topological_result != MatchTopologyResult::FullMatch) { | 808 | } |
| 963 | Deduction result{}; | 809 | auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); |
| 964 | result.type = DeductionType::DeductionFailed; | 810 | UploadImageContents(image, map, 0); |
| 965 | return result; | 811 | runtime.InsertUploadMemoryBarrier(); |
| 966 | } | 812 | } |
| 967 | const auto struct_result = current_surface->MatchesStructure(params); | ||
| 968 | if (struct_result != MatchStructureResult::None && | ||
| 969 | current_surface->MatchTarget(params.target)) { | ||
| 970 | Deduction result{}; | ||
| 971 | result.type = DeductionType::DeductionComplete; | ||
| 972 | result.surface = current_surface; | ||
| 973 | return result; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | 813 | ||
| 977 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 814 | template <class P> |
| 978 | auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; | 815 | template <typename MapBuffer> |
| 816 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | ||
| 817 | const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | ||
| 818 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 819 | |||
| 820 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 821 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 822 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 823 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | ||
| 824 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 825 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 826 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 827 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 828 | image.UploadMemory(map, buffer_offset, copies); | ||
| 829 | } else if (image.info.type == ImageType::Buffer) { | ||
| 830 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | ||
| 831 | image.UploadMemory(map, buffer_offset, copies); | ||
| 832 | } else { | ||
| 833 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 834 | image.UploadMemory(map, buffer_offset, copies); | ||
| 835 | } | ||
| 836 | } | ||
| 979 | 837 | ||
| 980 | if (overlaps.empty()) { | 838 | template <class P> |
| 981 | Deduction result{}; | 839 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 982 | result.type = DeductionType::DeductionIncomplete; | 840 | if (!IsValidAddress(gpu_memory, config)) { |
| 983 | return result; | 841 | return NULL_IMAGE_VIEW_ID; |
| 984 | } | 842 | } |
| 843 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 844 | ImageViewId& image_view_id = pair->second; | ||
| 845 | if (is_new) { | ||
| 846 | image_view_id = CreateImageView(config); | ||
| 847 | } | ||
| 848 | return image_view_id; | ||
| 849 | } | ||
| 985 | 850 | ||
| 986 | if (overlaps.size() > 1) { | 851 | template <class P> |
| 987 | Deduction result{}; | 852 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { |
| 988 | result.type = DeductionType::DeductionFailed; | 853 | const ImageInfo info(config); |
| 989 | return result; | 854 | const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; |
| 990 | } else { | 855 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); |
| 991 | Deduction result{}; | 856 | if (!image_id) { |
| 992 | result.type = DeductionType::DeductionComplete; | 857 | return NULL_IMAGE_VIEW_ID; |
| 993 | result.surface = overlaps[0]; | ||
| 994 | return result; | ||
| 995 | } | ||
| 996 | } | 858 | } |
| 859 | ImageBase& image = slot_images[image_id]; | ||
| 860 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 861 | ASSERT(base.level == 0); | ||
| 862 | const ImageViewInfo view_info(config, base.layer); | ||
| 863 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 864 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 865 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 866 | image.flags |= ImageFlagBits::Strong; | ||
| 867 | return image_view_id; | ||
| 868 | } | ||
| 997 | 869 | ||
| 998 | /** | 870 | template <class P> |
| 999 | * Gets a null surface based on a target texture. | 871 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1000 | * @param target The target of the null surface. | 872 | RelaxedOptions options) { |
| 1001 | */ | 873 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { |
| 1002 | TView GetNullSurface(SurfaceTarget target) { | 874 | return image_id; |
| 1003 | const u32 i_target = static_cast<u32>(target); | 875 | } |
| 1004 | if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { | 876 | return InsertImage(info, gpu_addr, options); |
| 1005 | return it->second->GetMainView(); | 877 | } |
| 1006 | } | 878 | |
| 1007 | SurfaceParams params{}; | 879 | template <class P> |
| 1008 | params.target = target; | 880 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1009 | params.is_tiled = false; | 881 | RelaxedOptions options) { |
| 1010 | params.srgb_conversion = false; | 882 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1011 | params.is_layered = | 883 | if (!cpu_addr) { |
| 1012 | target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || | 884 | return ImageId{}; |
| 1013 | target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; | 885 | } |
| 1014 | params.block_width = 0; | 886 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1015 | params.block_height = 0; | 887 | ImageId image_id; |
| 1016 | params.block_depth = 0; | 888 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1017 | params.tile_width_spacing = 1; | 889 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |
| 1018 | params.width = 1; | 890 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 1019 | params.height = 1; | 891 | True(existing_image.flags & ImageFlagBits::Strong); |
| 1020 | params.depth = 1; | 892 | const ImageInfo& existing = existing_image.info; |
| 1021 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | 893 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && |
| 1022 | params.depth = 6; | 894 | existing.pitch == info.pitch && |
| 1023 | } | 895 | IsPitchLinearSameSize(existing, info, strict_size) && |
| 1024 | params.pitch = 4; | 896 | IsViewCompatible(existing.format, info.format, broken_views)) { |
| 1025 | params.num_levels = 1; | 897 | image_id = existing_image_id; |
| 1026 | params.emulated_levels = 1; | 898 | return true; |
| 1027 | params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; | 899 | } |
| 1028 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 900 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { |
| 1029 | auto surface = CreateSurface(0ULL, params); | 901 | image_id = existing_image_id; |
| 1030 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 902 | return true; |
| 1031 | surface->UploadTexture(invalid_memory); | ||
| 1032 | surface->MarkAsModified(false, Tick()); | ||
| 1033 | invalid_cache.emplace(i_target, surface); | ||
| 1034 | return surface->GetMainView(); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /** | ||
| 1038 | * Gets the a source and destination starting address and parameters, | ||
| 1039 | * and tries to deduce if they are supposed to be depth textures. If so, their | ||
| 1040 | * parameters are modified and fixed into so. | ||
| 1041 | * | ||
| 1042 | * @param src_params The parameters of the candidate surface. | ||
| 1043 | * @param dst_params The parameters of the destination surface. | ||
| 1044 | * @param src_gpu_addr The starting address of the candidate surface. | ||
| 1045 | * @param dst_gpu_addr The starting address of the destination surface. | ||
| 1046 | **/ | ||
| 1047 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | ||
| 1048 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | ||
| 1049 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); | ||
| 1050 | auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); | ||
| 1051 | if (deduced_src.Failed() || deduced_dst.Failed()) { | ||
| 1052 | return; | ||
| 1053 | } | 903 | } |
| 904 | return false; | ||
| 905 | }; | ||
| 906 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 907 | return image_id; | ||
| 908 | } | ||
| 1054 | 909 | ||
| 1055 | const bool incomplete_src = deduced_src.Incomplete(); | 910 | template <class P> |
| 1056 | const bool incomplete_dst = deduced_dst.Incomplete(); | 911 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 912 | RelaxedOptions options) { | ||
| 913 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 914 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 915 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 916 | const Image& image = slot_images[image_id]; | ||
| 917 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 918 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 919 | if (is_new) { | ||
| 920 | it->second = slot_image_allocs.insert(); | ||
| 921 | } | ||
| 922 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 923 | return image_id; | ||
| 924 | } | ||
| 1057 | 925 | ||
| 1058 | if (incomplete_src && incomplete_dst) { | 926 | template <class P> |
| 927 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 928 | ImageInfo new_info = info; | ||
| 929 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 930 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 931 | std::vector<ImageId> overlap_ids; | ||
| 932 | std::vector<ImageId> left_aliased_ids; | ||
| 933 | std::vector<ImageId> right_aliased_ids; | ||
| 934 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 935 | if (info.type != overlap.info.type) { | ||
| 1059 | return; | 936 | return; |
| 1060 | } | 937 | } |
| 1061 | 938 | if (info.type == ImageType::Linear) { | |
| 1062 | const bool any_incomplete = incomplete_src || incomplete_dst; | 939 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 1063 | 940 | // Alias linear images with the same pitch | |
| 1064 | if (!any_incomplete) { | 941 | left_aliased_ids.push_back(overlap_id); |
| 1065 | if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { | ||
| 1066 | return; | ||
| 1067 | } | ||
| 1068 | } else { | ||
| 1069 | if (incomplete_src && !(deduced_dst.IsDepth())) { | ||
| 1070 | return; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | if (incomplete_dst && !(deduced_src.IsDepth())) { | ||
| 1074 | return; | ||
| 1075 | } | 942 | } |
| 943 | return; | ||
| 1076 | } | 944 | } |
| 1077 | 945 | static constexpr bool strict_size = true; | |
| 1078 | const auto inherit_format = [](SurfaceParams& to, TSurface from) { | 946 | const std::optional<OverlapResult> solution = |
| 1079 | const SurfaceParams& params = from->GetSurfaceParams(); | 947 | ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); |
| 1080 | to.pixel_format = params.pixel_format; | 948 | if (solution) { |
| 1081 | to.type = params.type; | 949 | gpu_addr = solution->gpu_addr; |
| 1082 | }; | 950 | cpu_addr = solution->cpu_addr; |
| 1083 | // Now we got the cases where one or both is Depth and the other is not known | 951 | new_info.resources = solution->resources; |
| 1084 | if (!incomplete_src) { | 952 | overlap_ids.push_back(overlap_id); |
| 1085 | inherit_format(src_params, deduced_src.surface); | 953 | return; |
| 1086 | } else { | ||
| 1087 | inherit_format(src_params, deduced_dst.surface); | ||
| 1088 | } | 954 | } |
| 1089 | if (!incomplete_dst) { | 955 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; |
| 1090 | inherit_format(dst_params, deduced_dst.surface); | 956 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |
| 957 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { | ||
| 958 | left_aliased_ids.push_back(overlap_id); | ||
| 959 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 960 | broken_views)) { | ||
| 961 | right_aliased_ids.push_back(overlap_id); | ||
| 962 | } | ||
| 963 | }); | ||
| 964 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 965 | Image& new_image = slot_images[new_image_id]; | ||
| 966 | |||
| 967 | // TODO: Only upload what we need | ||
| 968 | RefreshContents(new_image); | ||
| 969 | |||
| 970 | for (const ImageId overlap_id : overlap_ids) { | ||
| 971 | Image& overlap = slot_images[overlap_id]; | ||
| 972 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 973 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 1091 | } else { | 974 | } else { |
| 1092 | inherit_format(dst_params, deduced_src.surface); | 975 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); |
| 976 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 977 | runtime.CopyImage(new_image, overlap, copies); | ||
| 1093 | } | 978 | } |
| 979 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 980 | UntrackImage(overlap); | ||
| 981 | } | ||
| 982 | UnregisterImage(overlap_id); | ||
| 983 | DeleteImage(overlap_id); | ||
| 984 | } | ||
| 985 | ImageBase& new_image_base = new_image; | ||
| 986 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 987 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 988 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 989 | } | ||
| 990 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 991 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 992 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 1094 | } | 993 | } |
| 994 | RegisterImage(new_image_id); | ||
| 995 | return new_image_id; | ||
| 996 | } | ||
| 1095 | 997 | ||
| 1096 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | 998 | template <class P> |
| 1097 | bool preserve_contents) { | 999 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( |
| 1098 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | 1000 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { |
| 1099 | Register(new_surface); | 1001 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; |
| 1100 | if (preserve_contents) { | 1002 | const GPUVAddr dst_addr = dst.Address(); |
| 1101 | LoadSurface(new_surface); | 1003 | const GPUVAddr src_addr = src.Address(); |
| 1102 | } | 1004 | ImageInfo dst_info(dst); |
| 1103 | return {new_surface, new_surface->GetMainView()}; | 1005 | ImageInfo src_info(src); |
| 1006 | ImageId dst_id; | ||
| 1007 | ImageId src_id; | ||
| 1008 | do { | ||
| 1009 | has_deleted_images = false; | ||
| 1010 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 1011 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 1012 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 1013 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 1014 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 1015 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 1016 | continue; | ||
| 1017 | } | ||
| 1018 | if (!dst_id) { | ||
| 1019 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1020 | } | ||
| 1021 | if (!src_id) { | ||
| 1022 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 1023 | } | ||
| 1024 | } while (has_deleted_images); | ||
| 1025 | return BlitImages{ | ||
| 1026 | .dst_id = dst_id, | ||
| 1027 | .src_id = src_id, | ||
| 1028 | .dst_format = dst_info.format, | ||
| 1029 | .src_format = src_info.format, | ||
| 1030 | }; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | template <class P> | ||
| 1034 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 1035 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 1036 | return NULL_SAMPLER_ID; | ||
| 1037 | } | ||
| 1038 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 1039 | if (is_new) { | ||
| 1040 | pair->second = slot_samplers.insert(runtime, config); | ||
| 1104 | } | 1041 | } |
| 1042 | return pair->second; | ||
| 1043 | } | ||
| 1105 | 1044 | ||
| 1106 | void LoadSurface(const TSurface& surface) { | 1045 | template <class P> |
| 1107 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1046 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
| 1108 | surface->LoadBuffer(gpu_memory, staging_cache); | 1047 | const auto& regs = maxwell3d.regs; |
| 1109 | surface->UploadTexture(staging_cache.GetBuffer(0)); | 1048 | if (index >= regs.rt_control.count) { |
| 1110 | surface->MarkAsModified(false, Tick()); | 1049 | return ImageViewId{}; |
| 1050 | } | ||
| 1051 | const auto& rt = regs.rt[index]; | ||
| 1052 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 1053 | if (gpu_addr == 0) { | ||
| 1054 | return ImageViewId{}; | ||
| 1055 | } | ||
| 1056 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1057 | return ImageViewId{}; | ||
| 1111 | } | 1058 | } |
| 1059 | const ImageInfo info(regs, index); | ||
| 1060 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1061 | } | ||
| 1112 | 1062 | ||
| 1113 | void FlushSurface(const TSurface& surface) { | 1063 | template <class P> |
| 1114 | if (!surface->IsModified()) { | 1064 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
| 1115 | return; | 1065 | const auto& regs = maxwell3d.regs; |
| 1116 | } | 1066 | if (!regs.zeta_enable) { |
| 1117 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1067 | return ImageViewId{}; |
| 1118 | surface->DownloadTexture(staging_cache.GetBuffer(0)); | 1068 | } |
| 1119 | surface->FlushBuffer(gpu_memory, staging_cache); | 1069 | const GPUVAddr gpu_addr = regs.zeta.Address(); |
| 1120 | surface->MarkAsModified(false, Tick()); | 1070 | if (gpu_addr == 0) { |
| 1121 | } | 1071 | return ImageViewId{}; |
| 1122 | |||
| 1123 | void RegisterInnerCache(TSurface& surface) { | ||
| 1124 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 1125 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1126 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | ||
| 1127 | l1_cache[cpu_addr] = surface; | ||
| 1128 | while (start <= end) { | ||
| 1129 | registry[start].push_back(surface); | ||
| 1130 | start++; | ||
| 1131 | } | ||
| 1132 | } | 1072 | } |
| 1073 | const ImageInfo info(regs); | ||
| 1074 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1075 | } | ||
| 1133 | 1076 | ||
| 1134 | void UnregisterInnerCache(TSurface& surface) { | 1077 | template <class P> |
| 1135 | const VAddr cpu_addr = surface->GetCpuAddr(); | 1078 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1136 | VAddr start = cpu_addr >> registry_page_bits; | 1079 | bool is_clear) { |
| 1137 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | 1080 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; |
| 1138 | l1_cache.erase(cpu_addr); | 1081 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); |
| 1139 | while (start <= end) { | 1082 | if (!image_id) { |
| 1140 | auto& reg{registry[start]}; | 1083 | return NULL_IMAGE_VIEW_ID; |
| 1141 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | 1084 | } |
| 1142 | start++; | 1085 | Image& image = slot_images[image_id]; |
| 1143 | } | 1086 | const ImageViewType view_type = RenderTargetImageViewType(info); |
| 1087 | SubresourceBase base; | ||
| 1088 | if (image.info.type == ImageType::Linear) { | ||
| 1089 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 1090 | } else { | ||
| 1091 | base = image.TryFindBase(gpu_addr).value(); | ||
| 1144 | } | 1092 | } |
| 1093 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 1094 | const SubresourceRange range{ | ||
| 1095 | .base = base, | ||
| 1096 | .extent = {.levels = 1, .layers = layers}, | ||
| 1097 | }; | ||
| 1098 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 1099 | } | ||
| 1145 | 1100 | ||
| 1146 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1101 | template <class P> |
| 1147 | if (size == 0) { | 1102 | template <typename Func> |
| 1148 | return {}; | 1103 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { |
| 1104 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1105 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1106 | boost::container::small_vector<ImageId, 32> images; | ||
| 1107 | ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { | ||
| 1108 | const auto it = page_table.find(page); | ||
| 1109 | if (it == page_table.end()) { | ||
| 1110 | if constexpr (BOOL_BREAK) { | ||
| 1111 | return false; | ||
| 1112 | } else { | ||
| 1113 | return; | ||
| 1114 | } | ||
| 1149 | } | 1115 | } |
| 1150 | const VAddr cpu_addr_end = cpu_addr + size; | 1116 | for (const ImageId image_id : it->second) { |
| 1151 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1117 | Image& image = slot_images[image_id]; |
| 1152 | VectorSurface surfaces; | 1118 | if (True(image.flags & ImageFlagBits::Picked)) { |
| 1153 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { | ||
| 1154 | const auto it = registry.find(start); | ||
| 1155 | if (it == registry.end()) { | ||
| 1156 | continue; | 1119 | continue; |
| 1157 | } | 1120 | } |
| 1158 | for (auto& surface : it->second) { | 1121 | if (!image.Overlaps(cpu_addr, size)) { |
| 1159 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1122 | continue; |
| 1160 | continue; | 1123 | } |
| 1124 | image.flags |= ImageFlagBits::Picked; | ||
| 1125 | images.push_back(image_id); | ||
| 1126 | if constexpr (BOOL_BREAK) { | ||
| 1127 | if (func(image_id, image)) { | ||
| 1128 | return true; | ||
| 1161 | } | 1129 | } |
| 1162 | surface->MarkAsPicked(true); | 1130 | } else { |
| 1163 | surfaces.push_back(surface); | 1131 | func(image_id, image); |
| 1164 | } | 1132 | } |
| 1165 | } | 1133 | } |
| 1166 | for (auto& surface : surfaces) { | 1134 | if constexpr (BOOL_BREAK) { |
| 1167 | surface->MarkAsPicked(false); | 1135 | return false; |
| 1168 | } | 1136 | } |
| 1169 | return surfaces; | 1137 | }); |
| 1138 | for (const ImageId image_id : images) { | ||
| 1139 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1170 | } | 1140 | } |
| 1141 | } | ||
| 1171 | 1142 | ||
| 1172 | void ReserveSurface(const SurfaceParams& params, TSurface surface) { | 1143 | template <class P> |
| 1173 | surface_reserve[params].push_back(std::move(surface)); | 1144 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { |
| 1145 | Image& image = slot_images[image_id]; | ||
| 1146 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1147 | return image_view_id; | ||
| 1174 | } | 1148 | } |
| 1149 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1150 | image.InsertView(info, image_view_id); | ||
| 1151 | return image_view_id; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | template <class P> | ||
| 1155 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1156 | ImageBase& image = slot_images[image_id]; | ||
| 1157 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1158 | "Trying to register an already registered image"); | ||
| 1159 | image.flags |= ImageFlagBits::Registered; | ||
| 1160 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1161 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | ||
| 1162 | } | ||
| 1175 | 1163 | ||
| 1176 | TSurface TryGetReservedSurface(const SurfaceParams& params) { | 1164 | template <class P> |
| 1177 | auto search{surface_reserve.find(params)}; | 1165 | void TextureCache<P>::UnregisterImage(ImageId image_id) { |
| 1178 | if (search == surface_reserve.end()) { | 1166 | Image& image = slot_images[image_id]; |
| 1179 | return {}; | 1167 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1168 | "Trying to unregister an already registered image"); | ||
| 1169 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1170 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||
| 1171 | const auto page_it = page_table.find(page); | ||
| 1172 | if (page_it == page_table.end()) { | ||
| 1173 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1174 | return; | ||
| 1180 | } | 1175 | } |
| 1181 | for (auto& surface : search->second) { | 1176 | std::vector<ImageId>& image_ids = page_it->second; |
| 1182 | if (!surface->IsRegistered()) { | 1177 | const auto vector_it = std::ranges::find(image_ids, image_id); |
| 1183 | return surface; | 1178 | if (vector_it == image_ids.end()) { |
| 1184 | } | 1179 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); |
| 1180 | return; | ||
| 1185 | } | 1181 | } |
| 1186 | return {}; | 1182 | image_ids.erase(vector_it); |
| 1187 | } | 1183 | }); |
| 1184 | } | ||
| 1188 | 1185 | ||
| 1189 | /// Try to do an image copy logging when formats are incompatible. | 1186 | template <class P> |
| 1190 | void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { | 1187 | void TextureCache<P>::TrackImage(ImageBase& image) { |
| 1191 | const SurfaceParams& src_params = src->GetSurfaceParams(); | 1188 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1192 | const SurfaceParams& dst_params = dst->GetSurfaceParams(); | 1189 | image.flags |= ImageFlagBits::Tracked; |
| 1193 | if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { | 1190 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 1194 | LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, | 1191 | } |
| 1195 | src_params.pixel_format); | 1192 | |
| 1196 | return; | 1193 | template <class P> |
| 1194 | void TextureCache<P>::UntrackImage(ImageBase& image) { | ||
| 1195 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1196 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1197 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | template <class P> | ||
| 1201 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1202 | ImageBase& image = slot_images[image_id]; | ||
| 1203 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1204 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1205 | if (alloc_it == image_allocs_table.end()) { | ||
| 1206 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1207 | gpu_addr); | ||
| 1208 | return; | ||
| 1209 | } | ||
| 1210 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1211 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1212 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1213 | if (alloc_image_it == alloc_images.end()) { | ||
| 1214 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1215 | return; | ||
| 1216 | } | ||
| 1217 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1218 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1219 | |||
| 1220 | // Mark render targets as dirty | ||
| 1221 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1222 | dirty[Dirty::RenderTargets] = true; | ||
| 1223 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1224 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1225 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1226 | } | ||
| 1227 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1228 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1229 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1230 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1231 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1197 | } | 1232 | } |
| 1198 | ImageCopy(src, dst, copy); | ||
| 1199 | } | 1233 | } |
| 1234 | RemoveImageViewReferences(image_view_ids); | ||
| 1235 | RemoveFramebuffers(image_view_ids); | ||
| 1236 | |||
| 1237 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1238 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1239 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1240 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1241 | return other_alias.id == image_id; | ||
| 1242 | }); | ||
| 1243 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1244 | num_removed_aliases); | ||
| 1245 | } | ||
| 1246 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1247 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1248 | slot_image_views.erase(image_view_id); | ||
| 1249 | } | ||
| 1250 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1251 | slot_images.erase(image_id); | ||
| 1200 | 1252 | ||
| 1201 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | 1253 | alloc_images.erase(alloc_image_it); |
| 1202 | return siblings_table[static_cast<std::size_t>(format)]; | 1254 | if (alloc_images.empty()) { |
| 1255 | image_allocs_table.erase(alloc_it); | ||
| 1203 | } | 1256 | } |
| 1257 | if constexpr (ENABLE_VALIDATION) { | ||
| 1258 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1259 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1260 | } | ||
| 1261 | graphics_image_table.Invalidate(); | ||
| 1262 | compute_image_table.Invalidate(); | ||
| 1263 | has_deleted_images = true; | ||
| 1264 | } | ||
| 1204 | 1265 | ||
| 1205 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | 1266 | template <class P> |
| 1206 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | 1267 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
| 1207 | const VideoCommon::Shader::Sampler& entry) { | 1268 | auto it = image_views.begin(); |
| 1208 | const auto shader_type = entry.type; | 1269 | while (it != image_views.end()) { |
| 1209 | switch (tic_type) { | 1270 | const auto found = std::ranges::find(removed_views, it->second); |
| 1210 | case Tegra::Texture::TextureType::Texture1D: | 1271 | if (found != removed_views.end()) { |
| 1211 | case Tegra::Texture::TextureType::Texture1DArray: | 1272 | it = image_views.erase(it); |
| 1212 | return shader_type == Tegra::Shader::TextureType::Texture1D; | 1273 | } else { |
| 1213 | case Tegra::Texture::TextureType::Texture1DBuffer: | 1274 | ++it; |
| 1214 | // TODO(Rodrigo): Assume as valid for now | ||
| 1215 | return true; | ||
| 1216 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1217 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1218 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1219 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1220 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1221 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1222 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1223 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1224 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1225 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1226 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1227 | return true; | ||
| 1228 | } | ||
| 1229 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; | ||
| 1230 | } | 1275 | } |
| 1231 | UNREACHABLE(); | ||
| 1232 | return true; | ||
| 1233 | } | 1276 | } |
| 1277 | } | ||
| 1234 | 1278 | ||
| 1235 | struct FramebufferTargetInfo { | 1279 | template <class P> |
| 1236 | TSurface target; | 1280 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { |
| 1237 | TView view; | 1281 | auto it = framebuffers.begin(); |
| 1238 | }; | 1282 | while (it != framebuffers.end()) { |
| 1239 | 1283 | if (it->first.Contains(removed_views)) { | |
| 1240 | void AsyncFlushSurface(TSurface& surface) { | 1284 | it = framebuffers.erase(it); |
| 1241 | if (!uncommitted_flushes) { | 1285 | } else { |
| 1242 | uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | 1286 | ++it; |
| 1243 | } | 1287 | } |
| 1244 | uncommitted_flushes->push_back(surface); | ||
| 1245 | } | 1288 | } |
| 1289 | } | ||
| 1246 | 1290 | ||
| 1247 | VideoCore::RasterizerInterface& rasterizer; | 1291 | template <class P> |
| 1248 | Tegra::Engines::Maxwell3D& maxwell3d; | 1292 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { |
| 1249 | Tegra::MemoryManager& gpu_memory; | 1293 | image.flags |= ImageFlagBits::GpuModified; |
| 1250 | 1294 | image.modification_tick = ++modification_tick; | |
| 1251 | FormatLookupTable format_lookup_table; | 1295 | } |
| 1252 | FormatCompatibility format_compatibility; | ||
| 1253 | |||
| 1254 | u64 ticks{}; | ||
| 1255 | |||
| 1256 | // Guards the cache for protection conflicts. | ||
| 1257 | bool guard_render_targets{}; | ||
| 1258 | bool guard_samplers{}; | ||
| 1259 | |||
| 1260 | // The siblings table is for formats that can inter exchange with one another | ||
| 1261 | // without causing issues. This is only valid when a conflict occurs on a non | ||
| 1262 | // rendering use. | ||
| 1263 | std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; | ||
| 1264 | |||
| 1265 | // The internal Cache is different for the Texture Cache. It's based on buckets | ||
| 1266 | // of 1MB. This fits better for the purpose of this cache as textures are normaly | ||
| 1267 | // large in size. | ||
| 1268 | static constexpr u64 registry_page_bits{20}; | ||
| 1269 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||
| 1270 | std::unordered_map<VAddr, std::vector<TSurface>> registry; | ||
| 1271 | 1296 | ||
| 1272 | static constexpr u32 DEPTH_RT = 8; | 1297 | template <class P> |
| 1273 | static constexpr u32 NO_RT = 0xFFFFFFFF; | 1298 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 1299 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1300 | ImageBase& image = slot_images[image_id]; | ||
| 1301 | u64 most_recent_tick = image.modification_tick; | ||
| 1302 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1303 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1304 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1305 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1306 | aliased_images.push_back(&aliased); | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | if (aliased_images.empty()) { | ||
| 1310 | return; | ||
| 1311 | } | ||
| 1312 | image.modification_tick = most_recent_tick; | ||
| 1313 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1314 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1315 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1316 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1317 | }); | ||
| 1318 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1319 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1274 | 1322 | ||
| 1275 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 1323 | template <class P> |
| 1276 | // This avoids calculating size and other stuffs. | 1324 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { |
| 1277 | std::unordered_map<VAddr, TSurface> l1_cache; | 1325 | Image& image = slot_images[image_id]; |
| 1326 | if (invalidate) { | ||
| 1327 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1328 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1329 | TrackImage(image); | ||
| 1330 | } | ||
| 1331 | } else { | ||
| 1332 | RefreshContents(image); | ||
| 1333 | SynchronizeAliases(image_id); | ||
| 1334 | } | ||
| 1335 | if (is_modification) { | ||
| 1336 | MarkModification(image); | ||
| 1337 | } | ||
| 1338 | image.frame_tick = frame_tick; | ||
| 1339 | } | ||
| 1278 | 1340 | ||
| 1279 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 1341 | template <class P> |
| 1280 | /// previously been used. This is to prevent surfaces from being constantly created and | 1342 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, |
| 1281 | /// destroyed when used with different surface parameters. | 1343 | bool invalidate) { |
| 1282 | std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; | 1344 | if (!image_view_id) { |
| 1283 | std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | 1345 | return; |
| 1284 | render_targets; | 1346 | } |
| 1285 | FramebufferTargetInfo depth_buffer; | 1347 | const ImageViewBase& image_view = slot_image_views[image_view_id]; |
| 1348 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1349 | } | ||
| 1286 | 1350 | ||
| 1287 | std::vector<TSurface> sampled_textures; | 1351 | template <class P> |
| 1352 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1353 | Image& dst = slot_images[dst_id]; | ||
| 1354 | Image& src = slot_images[src_id]; | ||
| 1355 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1356 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1357 | if (src_format_type == dst_format_type) { | ||
| 1358 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1359 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1360 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1361 | } | ||
| 1362 | } | ||
| 1363 | return runtime.CopyImage(dst, src, copies); | ||
| 1364 | } | ||
| 1365 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1366 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1367 | for (const ImageCopy& copy : copies) { | ||
| 1368 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1369 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1370 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1371 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1372 | |||
| 1373 | const SubresourceBase dst_base{ | ||
| 1374 | .level = copy.dst_subresource.base_level, | ||
| 1375 | .layer = copy.dst_subresource.base_layer, | ||
| 1376 | }; | ||
| 1377 | const SubresourceBase src_base{ | ||
| 1378 | .level = copy.src_subresource.base_level, | ||
| 1379 | .layer = copy.src_subresource.base_layer, | ||
| 1380 | }; | ||
| 1381 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1382 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1383 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1384 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1385 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1386 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1387 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1388 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1389 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1390 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1391 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1392 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1393 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1394 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1395 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1396 | }; | ||
| 1397 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1288 | 1398 | ||
| 1289 | /// This cache stores null surfaces in order to be used as a placeholder | 1399 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); |
| 1290 | /// for invalid texture calls. | 1400 | } |
| 1291 | std::unordered_map<u32, TSurface> invalid_cache; | 1401 | } |
| 1292 | std::vector<u8> invalid_memory; | ||
| 1293 | 1402 | ||
| 1294 | std::list<TSurface> marked_for_unregister; | 1403 | template <class P> |
| 1404 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1405 | if (*old_id == new_id) { | ||
| 1406 | return; | ||
| 1407 | } | ||
| 1408 | if (*old_id) { | ||
| 1409 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1410 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1411 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1412 | } | ||
| 1413 | } | ||
| 1414 | *old_id = new_id; | ||
| 1415 | } | ||
| 1295 | 1416 | ||
| 1296 | std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | 1417 | template <class P> |
| 1297 | std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | 1418 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( |
| 1419 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1420 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1421 | const ImageBase& image = slot_images[image_id]; | ||
| 1422 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1423 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1424 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1425 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1426 | const u32 num_samples = image.info.num_samples; | ||
| 1427 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1428 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1429 | .color_buffer_ids = {color_view_id}, | ||
| 1430 | .depth_buffer_id = depth_view_id, | ||
| 1431 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1432 | }); | ||
| 1433 | return {framebuffer_id, view_id}; | ||
| 1434 | } | ||
| 1298 | 1435 | ||
| 1299 | StagingCache staging_cache; | 1436 | template <class P> |
| 1300 | std::recursive_mutex mutex; | 1437 | bool TextureCache<P>::IsFullClear(ImageViewId id) { |
| 1301 | }; | 1438 | if (!id) { |
| 1439 | return true; | ||
| 1440 | } | ||
| 1441 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1442 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1443 | const Extent3D size = image_view.size; | ||
| 1444 | const auto& regs = maxwell3d.regs; | ||
| 1445 | const auto& scissor = regs.scissor_test[0]; | ||
| 1446 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1447 | // Images with multiple resources can't be cleared in a single call | ||
| 1448 | return false; | ||
| 1449 | } | ||
| 1450 | if (regs.clear_flags.scissor == 0) { | ||
| 1451 | // If scissor testing is disabled, the clear is always full | ||
| 1452 | return true; | ||
| 1453 | } | ||
| 1454 | // Make sure the clear covers all texels in the subresource | ||
| 1455 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1456 | scissor.max_y >= size.height; | ||
| 1457 | } | ||
| 1302 | 1458 | ||
| 1303 | } // namespace VideoCommon | 1459 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/texture_cache/slot_vector.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | constexpr size_t NUM_RT = 8; | ||
| 14 | constexpr size_t MAX_MIP_LEVELS = 14; | ||
| 15 | |||
| 16 | constexpr SlotId CORRUPT_ID{0xfffffffe}; | ||
| 17 | |||
| 18 | using ImageId = SlotId; | ||
| 19 | using ImageViewId = SlotId; | ||
| 20 | using ImageAllocId = SlotId; | ||
| 21 | using SamplerId = SlotId; | ||
| 22 | using FramebufferId = SlotId; | ||
| 23 | |||
| 24 | enum class ImageType : u32 { | ||
| 25 | e1D, | ||
| 26 | e2D, | ||
| 27 | e3D, | ||
| 28 | Linear, | ||
| 29 | Buffer, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class ImageViewType : u32 { | ||
| 33 | e1D, | ||
| 34 | e2D, | ||
| 35 | Cube, | ||
| 36 | e3D, | ||
| 37 | e1DArray, | ||
| 38 | e2DArray, | ||
| 39 | CubeArray, | ||
| 40 | Rect, | ||
| 41 | Buffer, | ||
| 42 | }; | ||
| 43 | constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; | ||
| 44 | |||
| 45 | enum class RelaxedOptions : u32 { | ||
| 46 | Size = 1 << 0, | ||
| 47 | Format = 1 << 1, | ||
| 48 | Samples = 1 << 2, | ||
| 49 | }; | ||
| 50 | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | ||
| 51 | |||
| 52 | struct Offset2D { | ||
| 53 | constexpr auto operator<=>(const Offset2D&) const noexcept = default; | ||
| 54 | |||
| 55 | s32 x; | ||
| 56 | s32 y; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Offset3D { | ||
| 60 | constexpr auto operator<=>(const Offset3D&) const noexcept = default; | ||
| 61 | |||
| 62 | s32 x; | ||
| 63 | s32 y; | ||
| 64 | s32 z; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct Extent2D { | ||
| 68 | constexpr auto operator<=>(const Extent2D&) const noexcept = default; | ||
| 69 | |||
| 70 | u32 width; | ||
| 71 | u32 height; | ||
| 72 | }; | ||
| 73 | |||
| 74 | struct Extent3D { | ||
| 75 | constexpr auto operator<=>(const Extent3D&) const noexcept = default; | ||
| 76 | |||
| 77 | u32 width; | ||
| 78 | u32 height; | ||
| 79 | u32 depth; | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct SubresourceLayers { | ||
| 83 | s32 base_level = 0; | ||
| 84 | s32 base_layer = 0; | ||
| 85 | s32 num_layers = 1; | ||
| 86 | }; | ||
| 87 | |||
| 88 | struct SubresourceBase { | ||
| 89 | constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; | ||
| 90 | |||
| 91 | s32 level = 0; | ||
| 92 | s32 layer = 0; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct SubresourceExtent { | ||
| 96 | constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; | ||
| 97 | |||
| 98 | s32 levels = 1; | ||
| 99 | s32 layers = 1; | ||
| 100 | }; | ||
| 101 | |||
| 102 | struct SubresourceRange { | ||
| 103 | constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; | ||
| 104 | |||
| 105 | SubresourceBase base; | ||
| 106 | SubresourceExtent extent; | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct ImageCopy { | ||
| 110 | SubresourceLayers src_subresource; | ||
| 111 | SubresourceLayers dst_subresource; | ||
| 112 | Offset3D src_offset; | ||
| 113 | Offset3D dst_offset; | ||
| 114 | Extent3D extent; | ||
| 115 | }; | ||
| 116 | |||
| 117 | struct BufferImageCopy { | ||
| 118 | size_t buffer_offset; | ||
| 119 | size_t buffer_size; | ||
| 120 | u32 buffer_row_length; | ||
| 121 | u32 buffer_image_height; | ||
| 122 | SubresourceLayers image_subresource; | ||
| 123 | Offset3D image_offset; | ||
| 124 | Extent3D image_extent; | ||
| 125 | }; | ||
| 126 | |||
| 127 | struct BufferCopy { | ||
| 128 | size_t src_offset; | ||
| 129 | size_t dst_offset; | ||
| 130 | size_t size; | ||
| 131 | }; | ||
| 132 | |||
| 133 | struct SwizzleParameters { | ||
| 134 | Extent3D num_tiles; | ||
| 135 | Extent3D block; | ||
| 136 | size_t buffer_offset; | ||
| 137 | s32 level; | ||
| 138 | }; | ||
| 139 | |||
| 140 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..279932778 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -0,0 +1,1233 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This files contains code from Ryujinx | ||
| 6 | // A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx | ||
| 7 | // The sections using code from Ryujinx are marked with a link to the original version | ||
| 8 | |||
| 9 | // MIT License | ||
| 10 | // | ||
| 11 | // Copyright (c) Ryujinx Team and Contributors | ||
| 12 | // | ||
| 13 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 14 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 15 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 16 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 17 | // furnished to do so, subject to the following conditions: | ||
| 18 | // | ||
| 19 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 20 | // substantial portions of the Software. | ||
| 21 | // | ||
| 22 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 23 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 24 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 25 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 27 | // | ||
| 28 | |||
| 29 | #include <algorithm> | ||
| 30 | #include <array> | ||
| 31 | #include <numeric> | ||
| 32 | #include <optional> | ||
| 33 | #include <span> | ||
| 34 | #include <vector> | ||
| 35 | |||
| 36 | #include "common/alignment.h" | ||
| 37 | #include "common/assert.h" | ||
| 38 | #include "common/bit_util.h" | ||
| 39 | #include "common/common_types.h" | ||
| 40 | #include "common/div_ceil.h" | ||
| 41 | #include "video_core/compatible_formats.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/surface.h" | ||
| 45 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 46 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 47 | #include "video_core/texture_cache/formatter.h" | ||
| 48 | #include "video_core/texture_cache/samples_helper.h" | ||
| 49 | #include "video_core/texture_cache/util.h" | ||
| 50 | #include "video_core/textures/astc.h" | ||
| 51 | #include "video_core/textures/decoders.h" | ||
| 52 | |||
| 53 | namespace VideoCommon { | ||
| 54 | |||
| 55 | namespace { | ||
| 56 | |||
| 57 | using Tegra::Texture::GOB_SIZE; | ||
| 58 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 59 | using Tegra::Texture::GOB_SIZE_X; | ||
| 60 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 61 | using Tegra::Texture::GOB_SIZE_Y; | ||
| 62 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 63 | using Tegra::Texture::GOB_SIZE_Z; | ||
| 64 | using Tegra::Texture::GOB_SIZE_Z_SHIFT; | ||
| 65 | using Tegra::Texture::MsaaMode; | ||
| 66 | using Tegra::Texture::SwizzleTexture; | ||
| 67 | using Tegra::Texture::TextureFormat; | ||
| 68 | using Tegra::Texture::TextureType; | ||
| 69 | using Tegra::Texture::TICEntry; | ||
| 70 | using Tegra::Texture::UnswizzleTexture; | ||
| 71 | using VideoCore::Surface::BytesPerBlock; | ||
| 72 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 73 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 74 | using VideoCore::Surface::IsCopyCompatible; | ||
| 75 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 76 | using VideoCore::Surface::IsViewCompatible; | ||
| 77 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 78 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 79 | using VideoCore::Surface::SurfaceType; | ||
| 80 | |||
| 81 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 82 | |||
| 83 | struct LevelInfo { | ||
| 84 | Extent3D size; | ||
| 85 | Extent3D block; | ||
| 86 | Extent2D tile_size; | ||
| 87 | u32 bpp_log2; | ||
| 88 | u32 tile_width_spacing; | ||
| 89 | }; | ||
| 90 | |||
| 91 | [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { | ||
| 92 | if (shift == 0) { | ||
| 93 | return 0; | ||
| 94 | } | ||
| 95 | u32 x = unit_factor << (shift - 1); | ||
| 96 | if (x >= dimension) { | ||
| 97 | while (--shift) { | ||
| 98 | x >>= 1; | ||
| 99 | if (x < dimension) { | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | ||
| 104 | return shift; | ||
| 105 | } | ||
| 106 | |||
| 107 | [[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { | ||
| 108 | return std::max<u32>(size >> level, 1); | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { | ||
| 112 | return Extent3D{ | ||
| 113 | .width = AdjustMipSize(size.width, level), | ||
| 114 | .height = AdjustMipSize(size.height, level), | ||
| 115 | .depth = AdjustMipSize(size.depth, level), | ||
| 116 | }; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { | ||
| 120 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 121 | return Extent3D{ | ||
| 122 | .width = size.width >> samples_x, | ||
| 123 | .height = size.height >> samples_y, | ||
| 124 | .depth = size.depth, | ||
| 125 | }; | ||
| 126 | } | ||
| 127 | |||
| 128 | template <u32 GOB_EXTENT> | ||
| 129 | [[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { | ||
| 130 | do { | ||
| 131 | while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { | ||
| 132 | --block_size; | ||
| 133 | } | ||
| 134 | } while (level--); | ||
| 135 | return block_size; | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, | ||
| 139 | u32 level) { | ||
| 140 | return { | ||
| 141 | .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), | ||
| 142 | .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), | ||
| 143 | .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), | ||
| 144 | }; | ||
| 145 | } | ||
| 146 | |||
| 147 | [[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { | ||
| 148 | return { | ||
| 149 | .width = Common::DivCeil(size.width, tile_size.width), | ||
| 150 | .height = Common::DivCeil(size.height, tile_size.height), | ||
| 151 | .depth = size.depth, | ||
| 152 | }; | ||
| 153 | } | ||
| 154 | |||
| 155 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { | ||
| 156 | return std::countl_zero(bytes_per_block) ^ 0x1F; | ||
| 157 | } | ||
| 158 | |||
| 159 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { | ||
| 160 | return BytesPerBlockLog2(BytesPerBlock(format)); | ||
| 161 | } | ||
| 162 | |||
| 163 | [[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { | ||
| 164 | const Extent3D num_blocks = AdjustTileSize(size, tile_size); | ||
| 165 | return num_blocks.width * num_blocks.height * num_blocks.depth; | ||
| 166 | } | ||
| 167 | |||
| 168 | [[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { | ||
| 169 | return Common::DivCeil(AdjustMipSize(size, level), block_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | [[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { | ||
| 173 | return config.Width() * config.Height() * BytesPerBlock(format); | ||
| 174 | } | ||
| 175 | |||
| 176 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { | ||
| 177 | switch (type) { | ||
| 178 | case TextureType::Texture2D: | ||
| 179 | case TextureType::Texture2DArray: | ||
| 180 | case TextureType::Texture2DNoMipmap: | ||
| 181 | case TextureType::Texture3D: | ||
| 182 | case TextureType::TextureCubeArray: | ||
| 183 | case TextureType::TextureCubemap: | ||
| 184 | return true; | ||
| 185 | case TextureType::Texture1D: | ||
| 186 | case TextureType::Texture1DArray: | ||
| 187 | case TextureType::Texture1DBuffer: | ||
| 188 | return false; | ||
| 189 | } | ||
| 190 | return false; | ||
| 191 | } | ||
| 192 | |||
| 193 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { | ||
| 194 | switch (type) { | ||
| 195 | case ImageType::e2D: | ||
| 196 | case ImageType::e3D: | ||
| 197 | case ImageType::Linear: | ||
| 198 | return true; | ||
| 199 | case ImageType::e1D: | ||
| 200 | case ImageType::Buffer: | ||
| 201 | return false; | ||
| 202 | } | ||
| 203 | UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); | ||
| 204 | } | ||
| 205 | |||
| 206 | [[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { | ||
| 207 | switch (num_samples) { | ||
| 208 | case 1: | ||
| 209 | return {1, 1}; | ||
| 210 | case 2: | ||
| 211 | return {2, 1}; | ||
| 212 | case 4: | ||
| 213 | return {2, 2}; | ||
| 214 | case 8: | ||
| 215 | return {4, 2}; | ||
| 216 | case 16: | ||
| 217 | return {4, 4}; | ||
| 218 | } | ||
| 219 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 220 | return {1, 1}; | ||
| 221 | } | ||
| 222 | |||
| 223 | [[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { | ||
| 224 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; | ||
| 225 | } | ||
| 226 | |||
| 227 | [[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { | ||
| 228 | return Extent3D{ | ||
| 229 | .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, | ||
| 230 | .height = AdjustSize(info.size.height, level, info.tile_size.height), | ||
| 231 | .depth = AdjustMipSize(info.size.depth, level), | ||
| 232 | }; | ||
| 233 | } | ||
| 234 | |||
| 235 | [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { | ||
| 236 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 237 | return Extent3D{ | ||
| 238 | .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), | ||
| 239 | .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), | ||
| 240 | .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), | ||
| 241 | }; | ||
| 242 | } | ||
| 243 | |||
| 244 | [[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { | ||
| 245 | return Extent2D{ | ||
| 246 | .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, | ||
| 247 | .height = GOB_SIZE_Y_SHIFT + block_height, | ||
| 248 | }; | ||
| 249 | } | ||
| 250 | |||
| 251 | [[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, | ||
| 252 | u32 block_depth) { | ||
| 253 | return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || | ||
| 254 | num_tiles.depth < (1U << block_depth); | ||
| 255 | } | ||
| 256 | |||
| 257 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, | ||
| 258 | u32 bpp_log2) { | ||
| 259 | if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { | ||
| 260 | return GOB_SIZE_X_SHIFT - bpp_log2; | ||
| 261 | } else { | ||
| 262 | return gob.width; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, | ||
| 267 | u32 tile_width_spacing) { | ||
| 268 | const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); | ||
| 269 | return StrideAlignment(num_tiles, block, gob, bpp_log2); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { | ||
| 273 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 274 | const Extent2D gobs{ | ||
| 275 | .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), | ||
| 276 | .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), | ||
| 277 | }; | ||
| 278 | const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 279 | const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); | ||
| 280 | const u32 alignment = is_small ? 0 : info.tile_width_spacing; | ||
| 281 | return Extent2D{ | ||
| 282 | .width = Common::AlignBits(gobs.width, alignment), | ||
| 283 | .height = gobs.height, | ||
| 284 | }; | ||
| 285 | } | ||
| 286 | |||
| 287 | [[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { | ||
| 288 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 289 | const Extent3D tile_shift = TileShift(info, level); | ||
| 290 | const Extent2D gobs = NumGobs(info, level); | ||
| 291 | return Extent3D{ | ||
| 292 | .width = Common::DivCeilLog2(gobs.width, tile_shift.width), | ||
| 293 | .height = Common::DivCeilLog2(gobs.height, tile_shift.height), | ||
| 294 | .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | [[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { | ||
| 299 | const Extent3D tile_shift = TileShift(info, level); | ||
| 300 | const Extent3D tiles = LevelTiles(info, level); | ||
| 301 | const u32 num_tiles = tiles.width * tiles.height * tiles.depth; | ||
| 302 | const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; | ||
| 303 | return num_tiles << shift; | ||
| 304 | } | ||
| 305 | |||
| 306 | [[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info, | ||
| 307 | u32 num_levels) { | ||
| 308 | ASSERT(num_levels <= MAX_MIP_LEVELS); | ||
| 309 | std::array<u32, MAX_MIP_LEVELS> sizes{}; | ||
| 310 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 311 | sizes[level] = CalculateLevelSize(info, level); | ||
| 312 | } | ||
| 313 | return sizes; | ||
| 314 | } | ||
| 315 | |||
| 316 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, | ||
| 317 | u32 num_samples, u32 tile_width_spacing) { | ||
| 318 | const auto [samples_x, samples_y] = Samples(num_samples); | ||
| 319 | const u32 bytes_per_block = BytesPerBlock(format); | ||
| 320 | return { | ||
| 321 | .size = | ||
| 322 | { | ||
| 323 | .width = size.width * samples_x, | ||
| 324 | .height = size.height * samples_y, | ||
| 325 | .depth = size.depth, | ||
| 326 | }, | ||
| 327 | .block = block, | ||
| 328 | .tile_size = DefaultBlockSize(format), | ||
| 329 | .bpp_log2 = BytesPerBlockLog2(bytes_per_block), | ||
| 330 | .tile_width_spacing = tile_width_spacing, | ||
| 331 | }; | ||
| 332 | } | ||
| 333 | |||
| 334 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { | ||
| 335 | return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, | ||
| 336 | info.tile_width_spacing); | ||
| 337 | } | ||
| 338 | |||
| 339 | [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, | ||
| 340 | u32 num_samples, u32 tile_width_spacing, | ||
| 341 | u32 level) { | ||
| 342 | const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); | ||
| 343 | u32 offset = 0; | ||
| 344 | for (u32 current_level = 0; current_level < level; ++current_level) { | ||
| 345 | offset += CalculateLevelSize(info, current_level); | ||
| 346 | } | ||
| 347 | return offset; | ||
| 348 | } | ||
| 349 | |||
| 350 | [[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, | ||
| 351 | u32 tile_size_y, u32 tile_width_spacing) { | ||
| 352 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 | ||
| 353 | if (tile_width_spacing > 0) { | ||
| 354 | const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; | ||
| 355 | return Common::AlignBits(size_bytes, alignment_log2); | ||
| 356 | } | ||
| 357 | const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); | ||
| 358 | while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { | ||
| 359 | --block.height; | ||
| 360 | } | ||
| 361 | while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { | ||
| 362 | --block.depth; | ||
| 363 | } | ||
| 364 | const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; | ||
| 365 | const u32 num_blocks = size_bytes >> block_shift; | ||
| 366 | if (size_bytes != num_blocks << block_shift) { | ||
| 367 | return (num_blocks + 1) << block_shift; | ||
| 368 | } | ||
| 369 | return size_bytes; | ||
| 370 | } | ||
| 371 | |||
| 372 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info, | ||
| 373 | const ImageBase& overlap, | ||
| 374 | bool strict_size) { | ||
| 375 | const ImageInfo& info = overlap.info; | ||
| 376 | if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { | ||
| 377 | return std::nullopt; | ||
| 378 | } | ||
| 379 | if (new_info.block != info.block) { | ||
| 380 | return std::nullopt; | ||
| 381 | } | ||
| 382 | const SubresourceExtent resources = new_info.resources; | ||
| 383 | return SubresourceExtent{ | ||
| 384 | .levels = std::max(resources.levels, info.resources.levels), | ||
| 385 | .layers = std::max(resources.layers, info.resources.layers), | ||
| 386 | }; | ||
| 387 | } | ||
| 388 | |||
| 389 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | ||
| 390 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 391 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | ||
| 392 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | ||
| 393 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 394 | if (it == slice_offsets.end()) { | ||
| 395 | return std::nullopt; | ||
| 396 | } | ||
| 397 | const std::vector subresources = CalculateSliceSubresources(new_info); | ||
| 398 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 399 | const ImageInfo& info = overlap.info; | ||
| 400 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 401 | return std::nullopt; | ||
| 402 | } | ||
| 403 | const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); | ||
| 404 | if (mip_depth < info.size.depth + base.layer) { | ||
| 405 | return std::nullopt; | ||
| 406 | } | ||
| 407 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 408 | return std::nullopt; | ||
| 409 | } | ||
| 410 | return SubresourceExtent{ | ||
| 411 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 412 | .layers = 1, | ||
| 413 | }; | ||
| 414 | } | ||
| 415 | |||
| 416 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( | ||
| 417 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 418 | const u32 layer_stride = new_info.layer_stride; | ||
| 419 | const s32 new_size = layer_stride * new_info.resources.layers; | ||
| 420 | const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); | ||
| 421 | if (diff > new_size) { | ||
| 422 | return std::nullopt; | ||
| 423 | } | ||
| 424 | const s32 base_layer = diff / layer_stride; | ||
| 425 | const s32 mip_offset = diff % layer_stride; | ||
| 426 | const std::array offsets = CalculateMipLevelOffsets(new_info); | ||
| 427 | const auto end = offsets.begin() + new_info.resources.levels; | ||
| 428 | const auto it = std::find(offsets.begin(), end, mip_offset); | ||
| 429 | if (it == end) { | ||
| 430 | // Mipmap is not aligned to any valid size | ||
| 431 | return std::nullopt; | ||
| 432 | } | ||
| 433 | const SubresourceBase base{ | ||
| 434 | .level = static_cast<s32>(std::distance(offsets.begin(), it)), | ||
| 435 | .layer = base_layer, | ||
| 436 | }; | ||
| 437 | const ImageInfo& info = overlap.info; | ||
| 438 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 439 | return std::nullopt; | ||
| 440 | } | ||
| 441 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 442 | return std::nullopt; | ||
| 443 | } | ||
| 444 | return SubresourceExtent{ | ||
| 445 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 446 | .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), | ||
| 447 | }; | ||
| 448 | } | ||
| 449 | |||
| 450 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info, | ||
| 451 | GPUVAddr gpu_addr, | ||
| 452 | VAddr cpu_addr, | ||
| 453 | const ImageBase& overlap, | ||
| 454 | bool strict_size) { | ||
| 455 | std::optional<SubresourceExtent> resources; | ||
| 456 | if (new_info.type != ImageType::e3D) { | ||
| 457 | resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); | ||
| 458 | } else { | ||
| 459 | resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); | ||
| 460 | } | ||
| 461 | if (!resources) { | ||
| 462 | return std::nullopt; | ||
| 463 | } | ||
| 464 | return OverlapResult{ | ||
| 465 | .gpu_addr = gpu_addr, | ||
| 466 | .cpu_addr = cpu_addr, | ||
| 467 | .resources = *resources, | ||
| 468 | }; | ||
| 469 | } | ||
| 470 | |||
| 471 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info, | ||
| 472 | GPUVAddr gpu_addr, | ||
| 473 | VAddr cpu_addr, | ||
| 474 | const ImageBase& overlap, | ||
| 475 | bool strict_size) { | ||
| 476 | const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr); | ||
| 477 | if (!base) { | ||
| 478 | return std::nullopt; | ||
| 479 | } | ||
| 480 | const ImageInfo& info = overlap.info; | ||
| 481 | if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { | ||
| 482 | return std::nullopt; | ||
| 483 | } | ||
| 484 | if (new_info.block != MipBlockSize(info, base->level)) { | ||
| 485 | return std::nullopt; | ||
| 486 | } | ||
| 487 | const SubresourceExtent resources = new_info.resources; | ||
| 488 | s32 layers = 1; | ||
| 489 | if (info.type != ImageType::e3D) { | ||
| 490 | layers = std::max(resources.layers, info.resources.layers + base->layer); | ||
| 491 | } | ||
| 492 | return OverlapResult{ | ||
| 493 | .gpu_addr = overlap.gpu_addr, | ||
| 494 | .cpu_addr = overlap.cpu_addr, | ||
| 495 | .resources = | ||
| 496 | { | ||
| 497 | .levels = std::max(resources.levels + base->level, info.resources.levels), | ||
| 498 | .layers = layers, | ||
| 499 | }, | ||
| 500 | }; | ||
| 501 | } | ||
| 502 | |||
| 503 | [[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { | ||
| 504 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 | ||
| 505 | static constexpr u32 STRIDE_ALIGNMENT = 32; | ||
| 506 | ASSERT(info.type == ImageType::Linear); | ||
| 507 | const Extent2D num_tiles{ | ||
| 508 | .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), | ||
| 509 | .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), | ||
| 510 | }; | ||
| 511 | const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); | ||
| 512 | return Extent2D{ | ||
| 513 | .width = Common::AlignUp(num_tiles.width, width_alignment), | ||
| 514 | .height = num_tiles.height, | ||
| 515 | }; | ||
| 516 | } | ||
| 517 | |||
| 518 | [[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { | ||
| 519 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 | ||
| 520 | ASSERT(info.type != ImageType::Linear); | ||
| 521 | const Extent3D size = AdjustMipSize(info.size, level); | ||
| 522 | const Extent3D num_tiles{ | ||
| 523 | .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), | ||
| 524 | .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), | ||
| 525 | .depth = size.depth, | ||
| 526 | }; | ||
| 527 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 528 | const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); | ||
| 529 | const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); | ||
| 530 | return Extent3D{ | ||
| 531 | .width = Common::AlignBits(num_tiles.width, alignment), | ||
| 532 | .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), | ||
| 533 | .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), | ||
| 534 | }; | ||
| 535 | } | ||
| 536 | |||
| 537 | [[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { | ||
| 538 | u32 num_blocks = 0; | ||
| 539 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 540 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 541 | num_blocks += NumBlocks(mip_size, tile_size); | ||
| 542 | } | ||
| 543 | return num_blocks; | ||
| 544 | } | ||
| 545 | |||
| 546 | [[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { | ||
| 547 | ASSERT(info.type == ImageType::e3D); | ||
| 548 | u32 num_slices = 0; | ||
| 549 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 550 | num_slices += AdjustMipSize(info.size.depth, level); | ||
| 551 | } | ||
| 552 | return num_slices; | ||
| 553 | } | ||
| 554 | |||
| 555 | void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 556 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 557 | std::span<const u8> memory) { | ||
| 558 | ASSERT(copy.image_offset.z == 0); | ||
| 559 | ASSERT(copy.image_extent.depth == 1); | ||
| 560 | ASSERT(copy.image_subresource.base_level == 0); | ||
| 561 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 562 | ASSERT(copy.image_subresource.num_layers == 1); | ||
| 563 | |||
| 564 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 565 | const u32 row_length = copy.image_extent.width * bytes_per_block; | ||
| 566 | const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; | ||
| 567 | |||
| 568 | for (u32 line = 0; line < copy.image_extent.height; ++line) { | ||
| 569 | const u32 host_offset_y = line * info.pitch; | ||
| 570 | const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; | ||
| 571 | const u32 guest_offset = guest_offset_x + guest_offset_y; | ||
| 572 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, | ||
| 573 | row_length); | ||
| 574 | } | ||
| 575 | } | ||
| 576 | |||
| 577 | void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 578 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 579 | std::span<const u8> input) { | ||
| 580 | const Extent3D size = info.size; | ||
| 581 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 582 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 583 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 584 | |||
| 585 | const s32 level = copy.image_subresource.base_level; | ||
| 586 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 587 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 588 | const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; | ||
| 589 | |||
| 590 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 591 | |||
| 592 | UNIMPLEMENTED_IF(copy.image_offset.x != 0); | ||
| 593 | UNIMPLEMENTED_IF(copy.image_offset.y != 0); | ||
| 594 | UNIMPLEMENTED_IF(copy.image_offset.z != 0); | ||
| 595 | UNIMPLEMENTED_IF(copy.image_extent != level_size); | ||
| 596 | |||
| 597 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 598 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 599 | |||
| 600 | size_t host_offset = copy.buffer_offset; | ||
| 601 | |||
| 602 | const u32 num_levels = info.resources.levels; | ||
| 603 | const std::array sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 604 | size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); | ||
| 605 | const size_t layer_stride = | ||
| 606 | AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, | ||
| 607 | level_info.block, tile_size.height, info.tile_width_spacing); | ||
| 608 | const size_t subresource_size = sizes[level]; | ||
| 609 | |||
| 610 | const auto dst_data = std::make_unique<u8[]>(subresource_size); | ||
| 611 | const std::span<u8> dst(dst_data.get(), subresource_size); | ||
| 612 | |||
| 613 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 614 | const std::span<const u8> src = input.subspan(host_offset); | ||
| 615 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||
| 616 | num_tiles.depth, block.height, block.depth); | ||
| 617 | |||
| 618 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 619 | |||
| 620 | host_offset += host_bytes_per_layer; | ||
| 621 | guest_offset += layer_stride; | ||
| 622 | } | ||
| 623 | ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); | ||
| 624 | } | ||
| 625 | |||
| 626 | } // Anonymous namespace | ||
| 627 | |||
| 628 | u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { | ||
| 629 | if (info.type == ImageType::Buffer) { | ||
| 630 | return info.size.width * BytesPerBlock(info.format); | ||
| 631 | } | ||
| 632 | if (info.type == ImageType::Linear) { | ||
| 633 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 634 | } | ||
| 635 | if (info.resources.layers > 1) { | ||
| 636 | ASSERT(info.layer_stride != 0); | ||
| 637 | return info.layer_stride * info.resources.layers; | ||
| 638 | } else { | ||
| 639 | return CalculateLayerSize(info); | ||
| 640 | } | ||
| 641 | } | ||
| 642 | |||
| 643 | u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { | ||
| 644 | if (info.type == ImageType::Buffer) { | ||
| 645 | return info.size.width * BytesPerBlock(info.format); | ||
| 646 | } | ||
| 647 | if (info.num_samples > 1) { | ||
| 648 | // Multisample images can't be uploaded or downloaded to the host | ||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | if (info.type == ImageType::Linear) { | ||
| 652 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 653 | } | ||
| 654 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 655 | return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); | ||
| 656 | } | ||
| 657 | |||
| 658 | u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | ||
| 659 | if (info.type == ImageType::Buffer) { | ||
| 660 | return info.size.width * BytesPerBlock(info.format); | ||
| 661 | } | ||
| 662 | static constexpr Extent2D TILE_SIZE{1, 1}; | ||
| 663 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 664 | } | ||
| 665 | |||
| 666 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | ||
| 667 | ASSERT(info.type != ImageType::Linear); | ||
| 668 | const u32 layer_size = CalculateLayerSize(info); | ||
| 669 | const Extent3D size = info.size; | ||
| 670 | const Extent3D block = info.block; | ||
| 671 | const u32 tile_size_y = DefaultBlockHeight(info.format); | ||
| 672 | return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); | ||
| 673 | } | ||
| 674 | |||
| 675 | u32 CalculateLayerSize(const ImageInfo& info) noexcept { | ||
| 676 | ASSERT(info.type != ImageType::Linear); | ||
| 677 | return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, | ||
| 678 | info.tile_width_spacing, info.resources.levels); | ||
| 679 | } | ||
| 680 | |||
| 681 | std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { | ||
| 682 | ASSERT(info.resources.levels <= MAX_MIP_LEVELS); | ||
| 683 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 684 | std::array<u32, MAX_MIP_LEVELS> offsets{}; | ||
| 685 | u32 offset = 0; | ||
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 687 | offsets[level] = offset; | ||
| 688 | offset += CalculateLevelSize(level_info, level); | ||
| 689 | } | ||
| 690 | return offsets; | ||
| 691 | } | ||
| 692 | |||
| 693 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | ||
| 694 | ASSERT(info.type == ImageType::e3D); | ||
| 695 | std::vector<u32> offsets; | ||
| 696 | offsets.reserve(NumSlices(info)); | ||
| 697 | |||
| 698 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 699 | u32 mip_offset = 0; | ||
| 700 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 701 | const Extent3D tile_shift = TileShift(level_info, level); | ||
| 702 | const Extent3D tiles = LevelTiles(level_info, level); | ||
| 703 | const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; | ||
| 704 | const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; | ||
| 705 | const u32 z_mask = (1U << tile_shift.depth) - 1; | ||
| 706 | const u32 depth = AdjustMipSize(info.size.depth, level); | ||
| 707 | for (u32 slice = 0; slice < depth; ++slice) { | ||
| 708 | const u32 z_low = slice & z_mask; | ||
| 709 | const u32 z_high = slice & ~z_mask; | ||
| 710 | offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); | ||
| 711 | } | ||
| 712 | mip_offset += CalculateLevelSize(level_info, level); | ||
| 713 | } | ||
| 714 | return offsets; | ||
| 715 | } | ||
| 716 | |||
| 717 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | ||
| 718 | ASSERT(info.type == ImageType::e3D); | ||
| 719 | std::vector<SubresourceBase> subresources; | ||
| 720 | subresources.reserve(NumSlices(info)); | ||
| 721 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 722 | const s32 depth = AdjustMipSize(info.size.depth, level); | ||
| 723 | for (s32 slice = 0; slice < depth; ++slice) { | ||
| 724 | subresources.emplace_back(SubresourceBase{ | ||
| 725 | .level = level, | ||
| 726 | .layer = slice, | ||
| 727 | }); | ||
| 728 | } | ||
| 729 | } | ||
| 730 | return subresources; | ||
| 731 | } | ||
| 732 | |||
| 733 | u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { | ||
| 734 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 735 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 736 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 737 | const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); | ||
| 738 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 739 | return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); | ||
| 740 | } | ||
| 741 | |||
| 742 | PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { | ||
| 743 | return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 744 | config.a_type, config.srgb_conversion); | ||
| 745 | } | ||
| 746 | |||
| 747 | ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | ||
| 748 | switch (info.type) { | ||
| 749 | case ImageType::e2D: | ||
| 750 | return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 751 | case ImageType::e3D: | ||
| 752 | return ImageViewType::e2DArray; | ||
| 753 | case ImageType::Linear: | ||
| 754 | return ImageViewType::e2D; | ||
| 755 | default: | ||
| 756 | UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type)); | ||
| 757 | return ImageViewType{}; | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | ||
| 762 | SubresourceBase base) { | ||
| 763 | ASSERT(dst.resources.levels >= src.resources.levels); | ||
| 764 | ASSERT(dst.num_samples == src.num_samples); | ||
| 765 | |||
| 766 | const bool is_dst_3d = dst.type == ImageType::e3D; | ||
| 767 | if (is_dst_3d) { | ||
| 768 | ASSERT(src.type == ImageType::e3D); | ||
| 769 | ASSERT(src.resources.levels == 1); | ||
| 770 | } | ||
| 771 | |||
| 772 | std::vector<ImageCopy> copies; | ||
| 773 | copies.reserve(src.resources.levels); | ||
| 774 | for (s32 level = 0; level < src.resources.levels; ++level) { | ||
| 775 | ImageCopy& copy = copies.emplace_back(); | ||
| 776 | copy.src_subresource = SubresourceLayers{ | ||
| 777 | .base_level = level, | ||
| 778 | .base_layer = 0, | ||
| 779 | .num_layers = src.resources.layers, | ||
| 780 | }; | ||
| 781 | copy.dst_subresource = SubresourceLayers{ | ||
| 782 | .base_level = base.level + level, | ||
| 783 | .base_layer = is_dst_3d ? 0 : base.layer, | ||
| 784 | .num_layers = is_dst_3d ? 1 : src.resources.layers, | ||
| 785 | }; | ||
| 786 | copy.src_offset = Offset3D{ | ||
| 787 | .x = 0, | ||
| 788 | .y = 0, | ||
| 789 | .z = 0, | ||
| 790 | }; | ||
| 791 | copy.dst_offset = Offset3D{ | ||
| 792 | .x = 0, | ||
| 793 | .y = 0, | ||
| 794 | .z = is_dst_3d ? base.layer : 0, | ||
| 795 | }; | ||
| 796 | const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); | ||
| 797 | copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); | ||
| 798 | if (is_dst_3d) { | ||
| 799 | copy.extent.depth = src.size.depth; | ||
| 800 | } | ||
| 801 | } | ||
| 802 | return copies; | ||
| 803 | } | ||
| 804 | |||
| 805 | bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||
| 806 | if (config.Address() == 0) { | ||
| 807 | return false; | ||
| 808 | } | ||
| 809 | if (config.Address() > (u64(1) << 48)) { | ||
| 810 | return false; | ||
| 811 | } | ||
| 812 | return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); | ||
| 813 | } | ||
| 814 | |||
| 815 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 816 | const ImageInfo& info, std::span<u8> output) { | ||
| 817 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); | ||
| 818 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 819 | const Extent3D size = info.size; | ||
| 820 | |||
| 821 | if (info.type == ImageType::Linear) { | ||
| 822 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | ||
| 823 | |||
| 824 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | ||
| 825 | return {{ | ||
| 826 | .buffer_offset = 0, | ||
| 827 | .buffer_size = guest_size_bytes, | ||
| 828 | .buffer_row_length = info.pitch >> bpp_log2, | ||
| 829 | .buffer_image_height = size.height, | ||
| 830 | .image_subresource = | ||
| 831 | { | ||
| 832 | .base_level = 0, | ||
| 833 | .base_layer = 0, | ||
| 834 | .num_layers = 1, | ||
| 835 | }, | ||
| 836 | .image_offset = {0, 0, 0}, | ||
| 837 | .image_extent = size, | ||
| 838 | }}; | ||
| 839 | } | ||
| 840 | const auto input_data = std::make_unique<u8[]>(guest_size_bytes); | ||
| 841 | gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); | ||
| 842 | const std::span<const u8> input(input_data.get(), guest_size_bytes); | ||
| 843 | |||
| 844 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 845 | const s32 num_layers = info.resources.layers; | ||
| 846 | const s32 num_levels = info.resources.levels; | ||
| 847 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 848 | const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 849 | const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 850 | const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); | ||
| 851 | const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, | ||
| 852 | info.tile_width_spacing); | ||
| 853 | size_t guest_offset = 0; | ||
| 854 | u32 host_offset = 0; | ||
| 855 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 856 | |||
| 857 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 858 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 859 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 860 | const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; | ||
| 861 | copies[level] = BufferImageCopy{ | ||
| 862 | .buffer_offset = host_offset, | ||
| 863 | .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers, | ||
| 864 | .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), | ||
| 865 | .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), | ||
| 866 | .image_subresource = | ||
| 867 | { | ||
| 868 | .base_level = level, | ||
| 869 | .base_layer = 0, | ||
| 870 | .num_layers = info.resources.layers, | ||
| 871 | }, | ||
| 872 | .image_offset = {0, 0, 0}, | ||
| 873 | .image_extent = level_size, | ||
| 874 | }; | ||
| 875 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 876 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 877 | const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); | ||
| 878 | size_t guest_layer_offset = 0; | ||
| 879 | |||
| 880 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 881 | const std::span<u8> dst = output.subspan(host_offset); | ||
| 882 | const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset); | ||
| 883 | UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, | ||
| 884 | num_tiles.depth, block.height, block.depth, stride_alignment); | ||
| 885 | guest_layer_offset += layer_stride; | ||
| 886 | host_offset += host_bytes_per_layer; | ||
| 887 | } | ||
| 888 | guest_offset += level_sizes[level]; | ||
| 889 | } | ||
| 890 | return copies; | ||
| 891 | } | ||
| 892 | |||
| 893 | BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 894 | const ImageBase& image, std::span<u8> output) { | ||
| 895 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); | ||
| 896 | return BufferCopy{ | ||
| 897 | .src_offset = 0, | ||
| 898 | .dst_offset = 0, | ||
| 899 | .size = image.guest_size_bytes, | ||
| 900 | }; | ||
| 901 | } | ||
| 902 | |||
| 903 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 904 | std::span<BufferImageCopy> copies) { | ||
| 905 | u32 output_offset = 0; | ||
| 906 | |||
| 907 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 908 | for (BufferImageCopy& copy : copies) { | ||
| 909 | const u32 level = copy.image_subresource.base_level; | ||
| 910 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 911 | ASSERT(copy.image_offset == Offset3D{}); | ||
| 912 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 913 | ASSERT(copy.image_extent == mip_size); | ||
| 914 | ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); | ||
| 915 | ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); | ||
| 916 | |||
| 917 | if (IsPixelFormatASTC(info.format)) { | ||
| 918 | ASSERT(copy.image_extent.depth == 1); | ||
| 919 | Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), | ||
| 920 | copy.image_extent.width, copy.image_extent.height, | ||
| 921 | copy.image_subresource.num_layers, tile_size.width, | ||
| 922 | tile_size.height, output.subspan(output_offset)); | ||
| 923 | } else { | ||
| 924 | DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | ||
| 925 | output.subspan(output_offset)); | ||
| 926 | } | ||
| 927 | copy.buffer_offset = output_offset; | ||
| 928 | copy.buffer_row_length = mip_size.width; | ||
| 929 | copy.buffer_image_height = mip_size.height; | ||
| 930 | |||
| 931 | output_offset += copy.image_extent.width * copy.image_extent.height * | ||
| 932 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | ||
| 937 | const Extent3D size = info.size; | ||
| 938 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 939 | if (info.type == ImageType::Linear) { | ||
| 940 | ASSERT(info.pitch % bytes_per_block == 0); | ||
| 941 | return {{ | ||
| 942 | .buffer_offset = 0, | ||
| 943 | .buffer_size = static_cast<size_t>(info.pitch) * size.height, | ||
| 944 | .buffer_row_length = info.pitch / bytes_per_block, | ||
| 945 | .buffer_image_height = size.height, | ||
| 946 | .image_subresource = | ||
| 947 | { | ||
| 948 | .base_level = 0, | ||
| 949 | .base_layer = 0, | ||
| 950 | .num_layers = 1, | ||
| 951 | }, | ||
| 952 | .image_offset = {0, 0, 0}, | ||
| 953 | .image_extent = size, | ||
| 954 | }}; | ||
| 955 | } | ||
| 956 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 957 | |||
| 958 | const s32 num_layers = info.resources.layers; | ||
| 959 | const s32 num_levels = info.resources.levels; | ||
| 960 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 961 | |||
| 962 | u32 host_offset = 0; | ||
| 963 | |||
| 964 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 965 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 966 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 967 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 968 | const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; | ||
| 969 | copies[level] = BufferImageCopy{ | ||
| 970 | .buffer_offset = host_offset, | ||
| 971 | .buffer_size = host_bytes_per_level, | ||
| 972 | .buffer_row_length = level_size.width, | ||
| 973 | .buffer_image_height = level_size.height, | ||
| 974 | .image_subresource = | ||
| 975 | { | ||
| 976 | .base_level = level, | ||
| 977 | .base_layer = 0, | ||
| 978 | .num_layers = info.resources.layers, | ||
| 979 | }, | ||
| 980 | .image_offset = {0, 0, 0}, | ||
| 981 | .image_extent = level_size, | ||
| 982 | }; | ||
| 983 | host_offset += host_bytes_per_level; | ||
| 984 | } | ||
| 985 | return copies; | ||
| 986 | } | ||
| 987 | |||
| 988 | Extent3D MipSize(Extent3D size, u32 level) { | ||
| 989 | return AdjustMipSize(size, level); | ||
| 990 | } | ||
| 991 | |||
| 992 | Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | ||
| 993 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 994 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 995 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 996 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 997 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 998 | } | ||
| 999 | |||
| 1000 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | ||
| 1001 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 1002 | if (info.type == ImageType::Linear) { | ||
| 1003 | return std::vector{SwizzleParameters{ | ||
| 1004 | .num_tiles = AdjustTileSize(info.size, tile_size), | ||
| 1005 | .block = {}, | ||
| 1006 | .buffer_offset = 0, | ||
| 1007 | .level = 0, | ||
| 1008 | }}; | ||
| 1009 | } | ||
| 1010 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 1011 | const Extent3D size = info.size; | ||
| 1012 | const s32 num_levels = info.resources.levels; | ||
| 1013 | |||
| 1014 | u32 guest_offset = 0; | ||
| 1015 | std::vector<SwizzleParameters> params(num_levels); | ||
| 1016 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 1017 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 1018 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 1019 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 1020 | params[level] = SwizzleParameters{ | ||
| 1021 | .num_tiles = num_tiles, | ||
| 1022 | .block = block, | ||
| 1023 | .buffer_offset = guest_offset, | ||
| 1024 | .level = level, | ||
| 1025 | }; | ||
| 1026 | guest_offset += CalculateLevelSize(level_info, level); | ||
| 1027 | } | ||
| 1028 | return params; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 1032 | std::span<const BufferImageCopy> copies, std::span<const u8> memory) { | ||
| 1033 | const bool is_pitch_linear = info.type == ImageType::Linear; | ||
| 1034 | for (const BufferImageCopy& copy : copies) { | ||
| 1035 | if (is_pitch_linear) { | ||
| 1036 | SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1037 | } else { | ||
| 1038 | SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, | ||
| 1044 | u32 rhs_level, bool strict_size) noexcept { | ||
| 1045 | ASSERT(lhs.type != ImageType::Linear); | ||
| 1046 | ASSERT(rhs.type != ImageType::Linear); | ||
| 1047 | if (strict_size) { | ||
| 1048 | const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); | ||
| 1049 | const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); | ||
| 1050 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1051 | } else { | ||
| 1052 | const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); | ||
| 1053 | const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); | ||
| 1054 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { | ||
| 1059 | ASSERT(lhs.type == ImageType::Linear); | ||
| 1060 | ASSERT(rhs.type == ImageType::Linear); | ||
| 1061 | if (strict_size) { | ||
| 1062 | return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; | ||
| 1063 | } else { | ||
| 1064 | const Extent2D lhs_size = PitchLinearAlignedSize(lhs); | ||
| 1065 | const Extent2D rhs_size = PitchLinearAlignedSize(rhs); | ||
| 1066 | return lhs_size == rhs_size; | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, | ||
| 1071 | VAddr cpu_addr, const ImageBase& overlap, | ||
| 1072 | bool strict_size, bool broken_views) { | ||
| 1073 | ASSERT(new_info.type != ImageType::Linear); | ||
| 1074 | ASSERT(overlap.info.type != ImageType::Linear); | ||
| 1075 | if (!IsLayerStrideCompatible(new_info, overlap.info)) { | ||
| 1076 | return std::nullopt; | ||
| 1077 | } | ||
| 1078 | if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { | ||
| 1079 | return std::nullopt; | ||
| 1080 | } | ||
| 1081 | if (gpu_addr == overlap.gpu_addr) { | ||
| 1082 | const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); | ||
| 1083 | if (!solution) { | ||
| 1084 | return std::nullopt; | ||
| 1085 | } | ||
| 1086 | return OverlapResult{ | ||
| 1087 | .gpu_addr = gpu_addr, | ||
| 1088 | .cpu_addr = cpu_addr, | ||
| 1089 | .resources = *solution, | ||
| 1090 | }; | ||
| 1091 | } | ||
| 1092 | if (overlap.gpu_addr > gpu_addr) { | ||
| 1093 | return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1094 | } | ||
| 1095 | // if overlap.gpu_addr < gpu_addr | ||
| 1096 | return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { | ||
| 1100 | // If either of the layer strides is zero, we can assume they are compatible | ||
| 1101 | // These images generally come from rendertargets | ||
| 1102 | if (lhs.layer_stride == 0) { | ||
| 1103 | return true; | ||
| 1104 | } | ||
| 1105 | if (rhs.layer_stride == 0) { | ||
| 1106 | return true; | ||
| 1107 | } | ||
| 1108 | // It's definitely compatible if the layer stride matches | ||
| 1109 | if (lhs.layer_stride == rhs.layer_stride) { | ||
| 1110 | return true; | ||
| 1111 | } | ||
| 1112 | // Although we also have to compare for cases where it can be unaligned | ||
| 1113 | // This can happen if the image doesn't have layers, so the stride is not aligned | ||
| 1114 | if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { | ||
| 1115 | return true; | ||
| 1116 | } | ||
| 1117 | return false; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 1121 | GPUVAddr candidate_addr, RelaxedOptions options, | ||
| 1122 | bool broken_views) { | ||
| 1123 | const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); | ||
| 1124 | if (!base) { | ||
| 1125 | return std::nullopt; | ||
| 1126 | } | ||
| 1127 | const ImageInfo& existing = image.info; | ||
| 1128 | if (False(options & RelaxedOptions::Format)) { | ||
| 1129 | if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { | ||
| 1130 | return std::nullopt; | ||
| 1131 | } | ||
| 1132 | } | ||
| 1133 | if (!IsLayerStrideCompatible(existing, candidate)) { | ||
| 1134 | return std::nullopt; | ||
| 1135 | } | ||
| 1136 | if (existing.type != candidate.type) { | ||
| 1137 | return std::nullopt; | ||
| 1138 | } | ||
| 1139 | if (False(options & RelaxedOptions::Samples)) { | ||
| 1140 | if (existing.num_samples != candidate.num_samples) { | ||
| 1141 | return std::nullopt; | ||
| 1142 | } | ||
| 1143 | } | ||
| 1144 | if (existing.resources.levels < candidate.resources.levels + base->level) { | ||
| 1145 | return std::nullopt; | ||
| 1146 | } | ||
| 1147 | if (existing.type == ImageType::e3D) { | ||
| 1148 | const u32 mip_depth = std::max(1U, existing.size.depth << base->level); | ||
| 1149 | if (mip_depth < candidate.size.depth + base->layer) { | ||
| 1150 | return std::nullopt; | ||
| 1151 | } | ||
| 1152 | } else { | ||
| 1153 | if (existing.resources.layers < candidate.resources.layers + base->layer) { | ||
| 1154 | return std::nullopt; | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | const bool strict_size = False(options & RelaxedOptions::Size); | ||
| 1158 | if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { | ||
| 1159 | return std::nullopt; | ||
| 1160 | } | ||
| 1161 | // TODO: compare block sizes | ||
| 1162 | return base; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, | ||
| 1166 | RelaxedOptions options, bool broken_views) { | ||
| 1167 | return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 1171 | const ImageBase* src) { | ||
| 1172 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1173 | src_info.format = src->info.format; | ||
| 1174 | } | ||
| 1175 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1176 | dst_info.format = dst->info.format; | ||
| 1177 | } | ||
| 1178 | if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1179 | dst_info.format = src->info.format; | ||
| 1180 | } | ||
| 1181 | if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1182 | src_info.format = src->info.format; | ||
| 1183 | } | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | u32 MapSizeBytes(const ImageBase& image) { | ||
| 1187 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 1188 | return image.guest_size_bytes; | ||
| 1189 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 1190 | return image.converted_size_bytes; | ||
| 1191 | } else { | ||
| 1192 | return image.unswizzled_size_bytes; | ||
| 1193 | } | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | using P = PixelFormat; | ||
| 1197 | |||
| 1198 | static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); | ||
| 1199 | static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); | ||
| 1200 | |||
| 1201 | static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); | ||
| 1202 | static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == | ||
| 1203 | 0x50d200); | ||
| 1204 | |||
| 1205 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); | ||
| 1206 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); | ||
| 1207 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); | ||
| 1208 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); | ||
| 1209 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); | ||
| 1210 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); | ||
| 1211 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); | ||
| 1212 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); | ||
| 1213 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); | ||
| 1214 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); | ||
| 1215 | |||
| 1216 | constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, | ||
| 1217 | u32 tile_width_spacing, u32 level) { | ||
| 1218 | const Extent3D size{width, height, 1}; | ||
| 1219 | const Extent3D block{0, block_height, 0}; | ||
| 1220 | const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); | ||
| 1221 | return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); | ||
| 1225 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); | ||
| 1226 | static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); | ||
| 1227 | |||
| 1228 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, | ||
| 1229 | "Tile width spacing is not working"); | ||
| 1230 | static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, | ||
| 1231 | "Compressed tile width spacing is not working"); | ||
| 1232 | |||
| 1233 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..52a9207d6 --- /dev/null +++ b/src/video_core/texture_cache/util.h | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <span> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/image_base.h" | ||
| 15 | #include "video_core/texture_cache/image_view_base.h" | ||
| 16 | #include "video_core/texture_cache/types.h" | ||
| 17 | #include "video_core/textures/texture.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | using Tegra::Texture::TICEntry; | ||
| 22 | |||
| 23 | struct OverlapResult { | ||
| 24 | GPUVAddr gpu_addr; | ||
| 25 | VAddr cpu_addr; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | }; | ||
| 28 | |||
| 29 | [[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; | ||
| 32 | |||
| 33 | [[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; | ||
| 34 | |||
| 35 | [[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; | ||
| 36 | |||
| 37 | [[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; | ||
| 38 | |||
| 39 | [[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets( | ||
| 40 | const ImageInfo& info) noexcept; | ||
| 41 | |||
| 42 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | ||
| 43 | |||
| 44 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | ||
| 45 | |||
| 46 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | ||
| 47 | |||
| 48 | [[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( | ||
| 49 | const Tegra::Texture::TICEntry& config) noexcept; | ||
| 50 | |||
| 51 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | ||
| 52 | |||
| 53 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | ||
| 54 | const ImageInfo& src, | ||
| 55 | SubresourceBase base); | ||
| 56 | |||
| 57 | [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||
| 58 | |||
| 59 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||
| 60 | GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 61 | std::span<u8> output); | ||
| 62 | |||
| 63 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 64 | const ImageBase& image, std::span<u8> output); | ||
| 65 | |||
| 66 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 67 | std::span<BufferImageCopy> copies); | ||
| 68 | |||
| 69 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | ||
| 70 | |||
| 71 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | ||
| 72 | |||
| 73 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | ||
| 74 | |||
| 75 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | ||
| 76 | |||
| 77 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 78 | std::span<const BufferImageCopy> copies, std::span<const u8> memory); | ||
| 79 | |||
| 80 | [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, | ||
| 81 | const ImageInfo& overlap_info, u32 new_level, | ||
| 82 | u32 overlap_level, bool strict_size) noexcept; | ||
| 83 | |||
| 84 | [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, | ||
| 85 | bool strict_size) noexcept; | ||
| 86 | |||
| 87 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | ||
| 88 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 89 | const ImageBase& overlap, | ||
| 90 | bool strict_size, bool broken_views); | ||
| 91 | |||
| 92 | [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); | ||
| 93 | |||
| 94 | [[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, | ||
| 95 | const ImageBase& image, | ||
| 96 | GPUVAddr candidate_addr, | ||
| 97 | RelaxedOptions options, | ||
| 98 | bool broken_views); | ||
| 99 | |||
| 100 | [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 101 | GPUVAddr candidate_addr, RelaxedOptions options, | ||
| 102 | bool broken_views); | ||
| 103 | |||
| 104 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 105 | const ImageBase* src); | ||
| 106 | |||
| 107 | [[nodiscard]] u32 MapSizeBytes(const ImageBase& image); | ||
| 108 | |||
| 109 | } // namespace VideoCommon | ||
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstring> | 20 | #include <cstring> |
| 21 | #include <span> | ||
| 21 | #include <vector> | 22 | #include <vector> |
| 22 | 23 | ||
| 23 | #include <boost/container/static_vector.hpp> | 24 | #include <boost/container/static_vector.hpp> |
| @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 600 | return params; | 601 | return params; |
| 601 | } | 602 | } |
| 602 | 603 | ||
| 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, | 604 | static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, |
| 604 | u32 blockHeight) { | 605 | u32 blockHeight) { |
| 605 | // Don't actually care about the void extent, just read the bits... | 606 | // Don't actually care about the void extent, just read the bits... |
| 606 | for (s32 i = 0; i < 4; ++i) { | 607 | for (s32 i = 0; i < 4; ++i) { |
| @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block | |||
| 623 | } | 624 | } |
| 624 | } | 625 | } |
| 625 | 626 | ||
| 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | 627 | static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { |
| 627 | for (u32 j = 0; j < blockHeight; j++) { | 628 | for (u32 j = 0; j < blockHeight; j++) { |
| 628 | for (u32 i = 0; i < blockWidth; i++) { | 629 | for (u32 i = 0; i < blockWidth; i++) { |
| 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 630 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, | |||
| 1438 | #undef READ_INT_VALUES | 1439 | #undef READ_INT_VALUES |
| 1439 | } | 1440 | } |
| 1440 | 1441 | ||
| 1441 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, | 1442 | static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, |
| 1442 | u32* outBuf) { | 1443 | const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { |
| 1443 | InputBitStream strm(inBuf); | 1444 | InputBitStream strm(inBuf.data()); |
| 1444 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1445 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1445 | 1446 | ||
| 1446 | // Was there an error? | 1447 | // Was there an error? |
| @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1601 | } | 1602 | } |
| 1602 | 1603 | ||
| 1603 | // Read the texel weight data.. | 1604 | // Read the texel weight data.. |
| 1604 | u8 texelWeightData[16]; | 1605 | std::array<u8, 16> texelWeightData; |
| 1605 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1606 | std::ranges::copy(inBuf, texelWeightData.begin()); |
| 1606 | 1607 | ||
| 1607 | // Reverse everything | 1608 | // Reverse everything |
| 1608 | for (u32 i = 0; i < 8; i++) { | 1609 | for (u32 i = 0; i < 8; i++) { |
| @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1618 | 1619 | ||
| 1619 | // Make sure that higher non-texel bits are set to zero | 1620 | // Make sure that higher non-texel bits are set to zero |
| 1620 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1621 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1621 | texelWeightData[clearByteStart - 1] = | 1622 | if (clearByteStart > 0) { |
| 1622 | texelWeightData[clearByteStart - 1] & | 1623 | texelWeightData[clearByteStart - 1] &= |
| 1623 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1624 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1624 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1625 | } |
| 1626 | std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); | ||
| 1625 | 1627 | ||
| 1626 | IntegerEncodedVector texelWeightValues; | 1628 | IntegerEncodedVector texelWeightValues; |
| 1627 | 1629 | ||
| 1628 | InputBitStream weightStream(texelWeightData); | 1630 | InputBitStream weightStream(texelWeightData.data()); |
| 1629 | 1631 | ||
| 1630 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, | 1632 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1631 | weightParams.GetNumWeightValues()); | 1633 | weightParams.GetNumWeightValues()); |
| @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1672 | 1674 | ||
| 1673 | namespace Tegra::Texture::ASTC { | 1675 | namespace Tegra::Texture::ASTC { |
| 1674 | 1676 | ||
| 1675 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, | 1677 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 1676 | u32 block_height) { | 1678 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { |
| 1677 | u32 blockIdx = 0; | 1679 | u32 block_index = 0; |
| 1678 | std::size_t depth_offset = 0; | 1680 | std::size_t depth_offset = 0; |
| 1679 | std::vector<u8> outData(height * width * depth * 4); | 1681 | for (u32 z = 0; z < depth; z++) { |
| 1680 | for (u32 k = 0; k < depth; k++) { | 1682 | for (u32 y = 0; y < height; y += block_height) { |
| 1681 | for (u32 j = 0; j < height; j += block_height) { | 1683 | for (u32 x = 0; x < width; x += block_width) { |
| 1682 | for (u32 i = 0; i < width; i += block_width) { | 1684 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; |
| 1683 | |||
| 1684 | const u8* blockPtr = data + blockIdx * 16; | ||
| 1685 | 1685 | ||
| 1686 | // Blocks can be at most 12x12 | 1686 | // Blocks can be at most 12x12 |
| 1687 | u32 uncompData[144]; | 1687 | std::array<u32, 12 * 12> uncompData; |
| 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1689 | 1689 | ||
| 1690 | u32 decompWidth = std::min(block_width, width - i); | 1690 | u32 decompWidth = std::min(block_width, width - x); |
| 1691 | u32 decompHeight = std::min(block_height, height - j); | 1691 | u32 decompHeight = std::min(block_height, height - y); |
| 1692 | 1692 | ||
| 1693 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1693 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); |
| 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { | 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1695 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1695 | std::memcpy(outRow.data() + jj * width * 4, |
| 1696 | uncompData.data() + jj * block_width, decompWidth * 4); | ||
| 1696 | } | 1697 | } |
| 1697 | 1698 | ++block_index; | |
| 1698 | blockIdx++; | ||
| 1699 | } | 1699 | } |
| 1700 | } | 1700 | } |
| 1701 | depth_offset += height * width * 4; | 1701 | depth_offset += height * width * 4; |
| 1702 | } | 1702 | } |
| 1703 | |||
| 1704 | return outData; | ||
| 1705 | } | 1703 | } |
| 1706 | 1704 | ||
| 1707 | } // namespace Tegra::Texture::ASTC | 1705 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -5,11 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstdint> | 7 | #include <cstdint> |
| 8 | #include <vector> | ||
| 9 | 8 | ||
| 10 | namespace Tegra::Texture::ASTC { | 9 | namespace Tegra::Texture::ASTC { |
| 11 | 10 | ||
| 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 11 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 12 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |
| 14 | 13 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 14 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index bd1aebf02..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/textures/astc.h" | ||
| 15 | #include "video_core/textures/convert.h" | ||
| 16 | |||
| 17 | namespace Tegra::Texture { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | |||
| 21 | template <bool reverse> | ||
| 22 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 23 | union S8Z24 { | ||
| 24 | BitField<0, 24, u32> z24; | ||
| 25 | BitField<24, 8, u32> s8; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 28 | |||
| 29 | union Z24S8 { | ||
| 30 | BitField<0, 8, u32> s8; | ||
| 31 | BitField<8, 24, u32> z24; | ||
| 32 | }; | ||
| 33 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 34 | |||
| 35 | S8Z24 s8z24_pixel{}; | ||
| 36 | Z24S8 z24s8_pixel{}; | ||
| 37 | constexpr auto bpp{ | ||
| 38 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; | ||
| 39 | for (std::size_t y = 0; y < height; ++y) { | ||
| 40 | for (std::size_t x = 0; x < width; ++x) { | ||
| 41 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 42 | if constexpr (reverse) { | ||
| 43 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 44 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 45 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 46 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 47 | } else { | ||
| 48 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 49 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 50 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 51 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 58 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 59 | } | ||
| 60 | |||
| 61 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 62 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 63 | } | ||
| 64 | |||
| 65 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, | ||
| 66 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { | ||
| 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 69 | u32 block_width{}; | ||
| 70 | u32 block_height{}; | ||
| 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 72 | const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( | ||
| 73 | in_data, width, height, depth, block_width, block_height); | ||
| 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); | ||
| 75 | |||
| 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 82 | bool convert_astc, bool convert_s8z24) { | ||
| 83 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 84 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 85 | pixel_format); | ||
| 86 | UNREACHABLE(); | ||
| 87 | |||
| 88 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 89 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null | |||
| @@ -1,22 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCore::Surface { | ||
| 10 | enum class PixelFormat; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Tegra::Texture { | ||
| 14 | |||
| 15 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, | ||
| 16 | u32 width, u32 height, u32 depth, bool convert_astc, | ||
| 17 | bool convert_s8z24); | ||
| 18 | |||
| 19 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 20 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 21 | |||
| 22 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -2,204 +2,111 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <cmath> | 6 | #include <cmath> |
| 6 | #include <cstring> | 7 | #include <cstring> |
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 7 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 14 | #include "common/div_ceil.h" | ||
| 10 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 11 | #include "video_core/textures/decoders.h" | 16 | #include "video_core/textures/decoders.h" |
| 12 | #include "video_core/textures/texture.h" | 17 | #include "video_core/textures/texture.h" |
| 13 | 18 | ||
| 14 | namespace Tegra::Texture { | 19 | namespace Tegra::Texture { |
| 15 | namespace { | ||
| 16 | 20 | ||
| 21 | namespace { | ||
| 17 | /** | 22 | /** |
| 18 | * This table represents the internal swizzle of a gob, | 23 | * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. |
| 19 | * in format 16 bytes x 2 sector packing. | ||
| 20 | * Calculates the offset of an (x, y) position within a swizzled texture. | 24 | * Calculates the offset of an (x, y) position within a swizzled texture. |
| 21 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | 25 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 |
| 22 | */ | 26 | */ |
| 23 | template <std::size_t N, std::size_t M, u32 Align> | 27 | constexpr SwizzleTable MakeSwizzleTableConst() { |
| 24 | struct alignas(64) SwizzleTable { | 28 | SwizzleTable table{}; |
| 25 | static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); | 29 | for (u32 y = 0; y < table.size(); ++y) { |
| 26 | constexpr SwizzleTable() { | 30 | for (u32 x = 0; x < table[0].size(); ++x) { |
| 27 | for (u32 y = 0; y < N; ++y) { | 31 | table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + |
| 28 | for (u32 x = 0; x < M; ++x) { | 32 | (y % 2) * 16 + (x % 16); |
| 29 | const u32 x2 = x * Align; | ||
| 30 | values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + | ||
| 31 | ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); | ||
| 32 | } | ||
| 33 | } | 33 | } |
| 34 | } | 34 | } |
| 35 | const std::array<u16, M>& operator[](std::size_t index) const { | 35 | return table; |
| 36 | return values[index]; | 36 | } |
| 37 | } | ||
| 38 | std::array<std::array<u16, M>, N> values{}; | ||
| 39 | }; | ||
| 40 | 37 | ||
| 41 | constexpr u32 FAST_SWIZZLE_ALIGN = 16; | 38 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); |
| 42 | 39 | ||
| 43 | constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); | 40 | template <bool TO_LINEAR> |
| 44 | constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); | 41 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 42 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | ||
| 43 | // The origin of the transformation can be configured here, leave it as zero as the current API | ||
| 44 | // doesn't expose it. | ||
| 45 | static constexpr u32 origin_x = 0; | ||
| 46 | static constexpr u32 origin_y = 0; | ||
| 47 | static constexpr u32 origin_z = 0; | ||
| 45 | 48 | ||
| 46 | /** | 49 | // We can configure here a custom pitch |
| 47 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 50 | // As it's not exposed 'width * bpp' will be the expected pitch. |
| 48 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 51 | const u32 pitch = width * bytes_per_pixel; |
| 49 | * those. Block_Width is assumed to be 1. | 52 | const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; |
| 50 | */ | ||
| 51 | void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 52 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 53 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 54 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 55 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 56 | std::array<u8*, 2> data_ptrs; | ||
| 57 | u32 z_address = tile_offset; | ||
| 58 | |||
| 59 | for (u32 z = z_start; z < z_end; z++) { | ||
| 60 | u32 y_address = z_address; | ||
| 61 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 62 | for (u32 y = y_start; y < y_end; y++) { | ||
| 63 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 64 | for (u32 x = x_start; x < x_end; x++) { | ||
| 65 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; | ||
| 66 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | ||
| 67 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||
| 68 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||
| 69 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 70 | } | ||
| 71 | pixel_base += stride_x; | ||
| 72 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 73 | y_address += GOB_SIZE; | ||
| 74 | } | ||
| 75 | z_address += xy_block_size; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | 53 | ||
| 79 | /** | 54 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); |
| 80 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 55 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); |
| 81 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 56 | const u32 slice_size = |
| 82 | * those. Block_Width is assumed to be 1. | 57 | Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; |
| 83 | */ | ||
| 84 | void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 85 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 86 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 87 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 88 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 89 | std::array<u8*, 2> data_ptrs; | ||
| 90 | u32 z_address = tile_offset; | ||
| 91 | const u32 x_startb = x_start * bytes_per_pixel; | ||
| 92 | const u32 x_endb = x_end * bytes_per_pixel; | ||
| 93 | |||
| 94 | for (u32 z = z_start; z < z_end; z++) { | ||
| 95 | u32 y_address = z_address; | ||
| 96 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 97 | for (u32 y = y_start; y < y_end; y++) { | ||
| 98 | const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 99 | for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { | ||
| 100 | const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; | ||
| 101 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | ||
| 102 | const u32 pixel_index{out_x + pixel_base}; | ||
| 103 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; | ||
| 104 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; | ||
| 105 | std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); | ||
| 106 | } | ||
| 107 | pixel_base += stride_x; | ||
| 108 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 109 | y_address += GOB_SIZE; | ||
| 110 | } | ||
| 111 | z_address += xy_block_size; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | 58 | ||
| 115 | /** | 59 | const u32 block_height_mask = (1U << block_height) - 1; |
| 116 | * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. | 60 | const u32 block_depth_mask = (1U << block_depth) - 1; |
| 117 | * The body of this function takes care of splitting the swizzled texture into blocks, | 61 | const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; |
| 118 | * and managing the extents of it. Once all the parameters of a single block are obtained, | 62 | |
| 119 | * the function calls 'ProcessBlock' to process that particular Block. | 63 | for (u32 slice = 0; slice < depth; ++slice) { |
| 120 | * | 64 | const u32 z = slice + origin_z; |
| 121 | * Documentation for the memory layout and decoding can be found at: | 65 | const u32 offset_z = (z >> block_depth) * slice_size + |
| 122 | * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces | 66 | ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); |
| 123 | */ | 67 | for (u32 line = 0; line < height; ++line) { |
| 124 | template <bool fast> | 68 | const u32 y = line + origin_y; |
| 125 | void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | 69 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 126 | const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, | 70 | |
| 127 | const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, | 71 | const u32 block_y = y >> GOB_SIZE_Y_SHIFT; |
| 128 | const u32 width_spacing) { | 72 | const u32 offset_y = (block_y >> block_height) * block_size + |
| 129 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 73 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); |
| 130 | const u32 stride_x = width * out_bytes_per_pixel; | 74 | |
| 131 | const u32 layer_z = height * stride_x; | 75 | for (u32 column = 0; column < width; ++column) { |
| 132 | const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; | 76 | const u32 x = (column + origin_x) * bytes_per_pixel; |
| 133 | constexpr u32 gob_elements_y = GOB_SIZE_Y; | 77 | const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 134 | constexpr u32 gob_elements_z = GOB_SIZE_Z; | 78 | |
| 135 | const u32 block_x_elements = gob_elements_x; | 79 | const u32 base_swizzled_offset = offset_z + offset_y + offset_x; |
| 136 | const u32 block_y_elements = gob_elements_y * block_height; | 80 | const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; |
| 137 | const u32 block_z_elements = gob_elements_z * block_depth; | 81 | |
| 138 | const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); | 82 | const u32 unswizzled_offset = |
| 139 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); | 83 | slice * pitch * height + line * pitch + column * bytes_per_pixel; |
| 140 | const u32 blocks_on_y = div_ceil(height, block_y_elements); | 84 | |
| 141 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); | 85 | u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; |
| 142 | const u32 xy_block_size = GOB_SIZE * block_height; | 86 | const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; |
| 143 | const u32 block_size = xy_block_size * block_depth; | 87 | std::memcpy(dst, src, bytes_per_pixel); |
| 144 | u32 tile_offset = 0; | ||
| 145 | for (u32 zb = 0; zb < blocks_on_z; zb++) { | ||
| 146 | const u32 z_start = zb * block_z_elements; | ||
| 147 | const u32 z_end = std::min(depth, z_start + block_z_elements); | ||
| 148 | for (u32 yb = 0; yb < blocks_on_y; yb++) { | ||
| 149 | const u32 y_start = yb * block_y_elements; | ||
| 150 | const u32 y_end = std::min(height, y_start + block_y_elements); | ||
| 151 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | ||
| 152 | const u32 x_start = xb * block_x_elements; | ||
| 153 | const u32 x_end = std::min(width, x_start + block_x_elements); | ||
| 154 | if constexpr (fast) { | ||
| 155 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 156 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 157 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 158 | } else { | ||
| 159 | PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 160 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 161 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 162 | } | ||
| 163 | tile_offset += block_size; | ||
| 164 | } | 88 | } |
| 165 | } | 89 | } |
| 166 | } | 90 | } |
| 167 | } | 91 | } |
| 168 | |||
| 169 | } // Anonymous namespace | 92 | } // Anonymous namespace |
| 170 | 93 | ||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 94 | SwizzleTable MakeSwizzleTable() { |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 95 | return SWIZZLE_TABLE; |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | ||
| 174 | const u32 block_height_size{1U << block_height}; | ||
| 175 | const u32 block_depth_size{1U << block_depth}; | ||
| 176 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { | ||
| 177 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 178 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 179 | block_depth_size, width_spacing); | ||
| 180 | } else { | ||
| 181 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 182 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 183 | block_depth_size, width_spacing); | ||
| 184 | } | ||
| 185 | } | 96 | } |
| 186 | 97 | ||
| 187 | void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 98 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 188 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, | 99 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 189 | u32 block_depth, u32 width_spacing) { | 100 | u32 stride_alignment) { |
| 190 | CopySwizzledData((width + tile_size_x - 1) / tile_size_x, | 101 | Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 191 | (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, | 102 | stride_alignment); |
| 192 | bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, | ||
| 193 | width_spacing); | ||
| 194 | } | 103 | } |
| 195 | 104 | ||
| 196 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 105 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 197 | u32 width, u32 height, u32 depth, u32 block_height, | 106 | u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 198 | u32 block_depth, u32 width_spacing) { | 107 | u32 stride_alignment) { |
| 199 | std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); | 108 | Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 200 | UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, | 109 | stride_alignment); |
| 201 | width, height, depth, block_height, block_depth, width_spacing); | ||
| 202 | return unswizzled_data; | ||
| 203 | } | 110 | } |
| 204 | 111 | ||
| 205 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 112 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 213 | const u32 gob_address_y = | 120 | const u32 gob_address_y = |
| 214 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 121 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 215 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 122 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 216 | const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; | 123 | const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; |
| 217 | for (u32 x = 0; x < subrect_width; ++x) { | 124 | for (u32 x = 0; x < subrect_width; ++x) { |
| 218 | const u32 dst_x = x + offset_x; | 125 | const u32 dst_x = x + offset_x; |
| 219 | const u32 gob_address = | 126 | const u32 gob_address = |
| @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 235 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | 142 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); |
| 236 | 143 | ||
| 237 | const u32 block_height_mask = (1U << block_height) - 1; | 144 | const u32 block_height_mask = (1U << block_height) - 1; |
| 238 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; | 145 | const u32 x_shift = GOB_SIZE_SHIFT + block_height; |
| 239 | 146 | ||
| 240 | for (u32 line = 0; line < line_count; ++line) { | 147 | for (u32 line = 0; line < line_count; ++line) { |
| 241 | const u32 src_y = line + origin_y; | 148 | const u32 src_y = line + origin_y; |
| 242 | const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; | 149 | const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; |
| 243 | 150 | ||
| 244 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; | 151 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; |
| 245 | const u32 src_offset_y = (block_y >> block_height) * block_size + | 152 | const u32 src_offset_y = (block_y >> block_height) * block_size + |
| @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt | |||
| 270 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; | 177 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; |
| 271 | 178 | ||
| 272 | for (u32 line = 0; line < line_count; ++line) { | 179 | for (u32 line = 0; line < line_count; ++line) { |
| 273 | const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; | 180 | const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; |
| 274 | const u32 block_y = line / GOB_SIZE_Y; | 181 | const u32 block_y = line / GOB_SIZE_Y; |
| 275 | const u32 dst_offset_y = | 182 | const u32 dst_offset_y = |
| 276 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | 183 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; |
| @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 | |||
| 293 | const std::size_t gob_address_y = | 200 | const std::size_t gob_address_y = |
| 294 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 201 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 295 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 202 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 296 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | 203 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 297 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 204 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 298 | const std::size_t gob_address = | 205 | const std::size_t gob_address = |
| 299 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; | 206 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -4,7 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <span> |
| 8 | |||
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; | |||
| 15 | constexpr u32 GOB_SIZE_Z = 1; | 16 | constexpr u32 GOB_SIZE_Z = 1; |
| 16 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | 17 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
| 17 | 18 | ||
| 18 | constexpr std::size_t GOB_SIZE_X_SHIFT = 6; | 19 | constexpr u32 GOB_SIZE_X_SHIFT = 6; |
| 19 | constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; | 20 | constexpr u32 GOB_SIZE_Y_SHIFT = 3; |
| 20 | constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; | 21 | constexpr u32 GOB_SIZE_Z_SHIFT = 0; |
| 21 | constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | 22 | constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
| 22 | 23 | ||
| 23 | /// Unswizzles a swizzled texture without changing its format. | 24 | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; |
| 24 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 25 | |
| 25 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 26 | /// Returns a z-order swizzle table |
| 26 | u32 block_height = TICEntry::DefaultBlockHeight, | 27 | SwizzleTable MakeSwizzleTable(); |
| 27 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 28 | |
| 28 | 29 | /// Unswizzles a block linear texture into linear memory. | |
| 29 | /// Unswizzles a swizzled texture without changing its format. | 30 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 30 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 31 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 31 | u32 width, u32 height, u32 depth, | 32 | u32 stride_alignment = 1); |
| 32 | u32 block_height = TICEntry::DefaultBlockHeight, | 33 | |
| 33 | u32 block_depth = TICEntry::DefaultBlockHeight, | 34 | /// Swizzles linear memory into a block linear texture. |
| 34 | u32 width_spacing = 0); | 35 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 35 | 36 | u32 height, u32 depth, u32 block_height, u32 block_depth, | |
| 36 | /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. | 37 | u32 stride_alignment = 1); |
| 37 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | ||
| 38 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | ||
| 39 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | ||
| 40 | 38 | ||
| 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. | 39 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 40 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/cityhash.h" | ||
| 8 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| 12 | using Tegra::Texture::TICEntry; | ||
| 13 | using Tegra::Texture::TSCEntry; | ||
| 14 | |||
| 11 | namespace Tegra::Texture { | 15 | namespace Tegra::Texture { |
| 12 | 16 | ||
| 13 | namespace { | 17 | namespace { |
| @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { | |||
| 65 | 69 | ||
| 66 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 67 | 71 | ||
| 68 | std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | 72 | std::array<float, 4> TSCEntry::BorderColor() const noexcept { |
| 69 | if (!srgb_conversion) { | 73 | if (!srgb_conversion) { |
| 70 | return border_color; | 74 | return border_color; |
| 71 | } | 75 | } |
| @@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | |||
| 73 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; | 77 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; |
| 74 | } | 78 | } |
| 75 | 79 | ||
| 76 | float TSCEntry::GetMaxAnisotropy() const noexcept { | 80 | float TSCEntry::MaxAnisotropy() const noexcept { |
| 77 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); | 81 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); |
| 78 | } | 82 | } |
| 79 | 83 | ||
| 80 | } // namespace Tegra::Texture | 84 | } // namespace Tegra::Texture |
| 85 | |||
| 86 | size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept { | ||
| 87 | return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic); | ||
| 88 | } | ||
| 89 | |||
| 90 | size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept { | ||
| 91 | return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc); | ||
| 92 | } | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bbc7e3eaf..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { | |||
| 53 | BC4 = 0x27, | 53 | BC4 = 0x27, |
| 54 | BC5 = 0x28, | 54 | BC5 = 0x28, |
| 55 | S8D24 = 0x29, | 55 | S8D24 = 0x29, |
| 56 | X8Z24 = 0x2a, | 56 | X8D24 = 0x2a, |
| 57 | D24S8 = 0x2b, | 57 | D24S8 = 0x2b, |
| 58 | X4V4Z24__COV4R4V = 0x2c, | 58 | X4V4D24__COV4R4V = 0x2c, |
| 59 | X4V4Z24__COV8R8V = 0x2d, | 59 | X4V4D24__COV8R8V = 0x2d, |
| 60 | V8Z24__COV4R12V = 0x2e, | 60 | V8D24__COV4R12V = 0x2e, |
| 61 | D32 = 0x2f, | 61 | D32 = 0x2f, |
| 62 | D32S8 = 0x30, | 62 | D32S8 = 0x30, |
| 63 | X8Z24_X20V4S8__COV4R4V = 0x31, | 63 | X8D24_X20V4S8__COV4R4V = 0x31, |
| 64 | X8Z24_X20V4S8__COV8R8V = 0x32, | 64 | X8D24_X20V4S8__COV8R8V = 0x32, |
| 65 | ZF32_X20V4X8__COV4R4V = 0x33, | 65 | D32_X20V4X8__COV4R4V = 0x33, |
| 66 | ZF32_X20V4X8__COV8R8V = 0x34, | 66 | D32_X20V4X8__COV8R8V = 0x34, |
| 67 | ZF32_X20V4S8__COV4R4V = 0x35, | 67 | D32_X20V4S8__COV4R4V = 0x35, |
| 68 | ZF32_X20V4S8__COV8R8V = 0x36, | 68 | D32_X20V4S8__COV8R8V = 0x36, |
| 69 | X8Z24_X16V8S8__COV4R12V = 0x37, | 69 | X8D24_X16V8S8__COV4R12V = 0x37, |
| 70 | ZF32_X16V8X8__COV4R12V = 0x38, | 70 | D32_X16V8X8__COV4R12V = 0x38, |
| 71 | ZF32_X16V8S8__COV4R12V = 0x39, | 71 | D32_X16V8S8__COV4R12V = 0x39, |
| 72 | D16 = 0x3a, | 72 | D16 = 0x3a, |
| 73 | V8Z24__COV8R24V = 0x3b, | 73 | V8D24__COV8R24V = 0x3b, |
| 74 | X8Z24_X16V8S8__COV8R24V = 0x3c, | 74 | X8D24_X16V8S8__COV8R24V = 0x3c, |
| 75 | ZF32_X16V8X8__COV8R24V = 0x3d, | 75 | D32_X16V8X8__COV8R24V = 0x3d, |
| 76 | ZF32_X16V8S8__COV8R24V = 0x3e, | 76 | D32_X16V8S8__COV8R24V = 0x3e, |
| 77 | ASTC_2D_4X4 = 0x40, | 77 | ASTC_2D_4X4 = 0x40, |
| 78 | ASTC_2D_5X5 = 0x41, | 78 | ASTC_2D_5X5 = 0x41, |
| 79 | ASTC_2D_6X6 = 0x42, | 79 | ASTC_2D_6X6 = 0x42, |
| @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { | |||
| 146 | }; | 146 | }; |
| 147 | 147 | ||
| 148 | union TextureHandle { | 148 | union TextureHandle { |
| 149 | /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} | 149 | /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} |
| 150 | 150 | ||
| 151 | u32 raw; | 151 | u32 raw; |
| 152 | BitField<0, 20, u32> tic_id; | 152 | BitField<0, 20, u32> tic_id; |
| @@ -155,124 +155,124 @@ union TextureHandle { | |||
| 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); |
| 156 | 156 | ||
| 157 | struct TICEntry { | 157 | struct TICEntry { |
| 158 | static constexpr u32 DefaultBlockHeight = 16; | ||
| 159 | static constexpr u32 DefaultBlockDepth = 1; | ||
| 160 | |||
| 161 | union { | ||
| 162 | u32 raw; | ||
| 163 | BitField<0, 7, TextureFormat> format; | ||
| 164 | BitField<7, 3, ComponentType> r_type; | ||
| 165 | BitField<10, 3, ComponentType> g_type; | ||
| 166 | BitField<13, 3, ComponentType> b_type; | ||
| 167 | BitField<16, 3, ComponentType> a_type; | ||
| 168 | |||
| 169 | BitField<19, 3, SwizzleSource> x_source; | ||
| 170 | BitField<22, 3, SwizzleSource> y_source; | ||
| 171 | BitField<25, 3, SwizzleSource> z_source; | ||
| 172 | BitField<28, 3, SwizzleSource> w_source; | ||
| 173 | }; | ||
| 174 | u32 address_low; | ||
| 175 | union { | 158 | union { |
| 176 | BitField<0, 16, u32> address_high; | 159 | struct { |
| 177 | BitField<21, 3, TICHeaderVersion> header_version; | 160 | union { |
| 178 | }; | 161 | BitField<0, 7, TextureFormat> format; |
| 179 | union { | 162 | BitField<7, 3, ComponentType> r_type; |
| 180 | BitField<0, 3, u32> block_width; | 163 | BitField<10, 3, ComponentType> g_type; |
| 181 | BitField<3, 3, u32> block_height; | 164 | BitField<13, 3, ComponentType> b_type; |
| 182 | BitField<6, 3, u32> block_depth; | 165 | BitField<16, 3, ComponentType> a_type; |
| 166 | |||
| 167 | BitField<19, 3, SwizzleSource> x_source; | ||
| 168 | BitField<22, 3, SwizzleSource> y_source; | ||
| 169 | BitField<25, 3, SwizzleSource> z_source; | ||
| 170 | BitField<28, 3, SwizzleSource> w_source; | ||
| 171 | }; | ||
| 172 | u32 address_low; | ||
| 173 | union { | ||
| 174 | BitField<0, 16, u32> address_high; | ||
| 175 | BitField<16, 5, u32> layer_base_3_7; | ||
| 176 | BitField<21, 3, TICHeaderVersion> header_version; | ||
| 177 | BitField<24, 1, u32> load_store_hint; | ||
| 178 | BitField<25, 4, u32> view_coherency_hash; | ||
| 179 | BitField<29, 3, u32> layer_base_8_10; | ||
| 180 | }; | ||
| 181 | union { | ||
| 182 | BitField<0, 3, u32> block_width; | ||
| 183 | BitField<3, 3, u32> block_height; | ||
| 184 | BitField<6, 3, u32> block_depth; | ||
| 183 | 185 | ||
| 184 | BitField<10, 3, u32> tile_width_spacing; | 186 | BitField<10, 3, u32> tile_width_spacing; |
| 185 | 187 | ||
| 186 | // High 16 bits of the pitch value | 188 | // High 16 bits of the pitch value |
| 187 | BitField<0, 16, u32> pitch_high; | 189 | BitField<0, 16, u32> pitch_high; |
| 188 | BitField<26, 1, u32> use_header_opt_control; | 190 | BitField<26, 1, u32> use_header_opt_control; |
| 189 | BitField<27, 1, u32> depth_texture; | 191 | BitField<27, 1, u32> depth_texture; |
| 190 | BitField<28, 4, u32> max_mip_level; | 192 | BitField<28, 4, u32> max_mip_level; |
| 191 | 193 | ||
| 192 | BitField<0, 16, u32> buffer_high_width_minus_one; | 194 | BitField<0, 16, u32> buffer_high_width_minus_one; |
| 193 | }; | 195 | }; |
| 194 | union { | 196 | union { |
| 195 | BitField<0, 16, u32> width_minus_1; | 197 | BitField<0, 16, u32> width_minus_one; |
| 196 | BitField<22, 1, u32> srgb_conversion; | 198 | BitField<16, 3, u32> layer_base_0_2; |
| 197 | BitField<23, 4, TextureType> texture_type; | 199 | BitField<22, 1, u32> srgb_conversion; |
| 198 | BitField<29, 3, u32> border_size; | 200 | BitField<23, 4, TextureType> texture_type; |
| 201 | BitField<29, 3, u32> border_size; | ||
| 199 | 202 | ||
| 200 | BitField<0, 16, u32> buffer_low_width_minus_one; | 203 | BitField<0, 16, u32> buffer_low_width_minus_one; |
| 201 | }; | 204 | }; |
| 202 | union { | 205 | union { |
| 203 | BitField<0, 16, u32> height_minus_1; | 206 | BitField<0, 16, u32> height_minus_1; |
| 204 | BitField<16, 14, u32> depth_minus_1; | 207 | BitField<16, 14, u32> depth_minus_1; |
| 205 | }; | 208 | BitField<30, 1, u32> is_sparse; |
| 206 | union { | 209 | BitField<31, 1, u32> normalized_coords; |
| 207 | BitField<6, 13, u32> mip_lod_bias; | 210 | }; |
| 208 | BitField<27, 3, u32> max_anisotropy; | 211 | union { |
| 212 | BitField<6, 13, u32> mip_lod_bias; | ||
| 213 | BitField<27, 3, u32> max_anisotropy; | ||
| 214 | }; | ||
| 215 | union { | ||
| 216 | BitField<0, 4, u32> res_min_mip_level; | ||
| 217 | BitField<4, 4, u32> res_max_mip_level; | ||
| 218 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 219 | BitField<12, 12, u32> min_lod_clamp; | ||
| 220 | }; | ||
| 221 | }; | ||
| 222 | std::array<u64, 4> raw; | ||
| 209 | }; | 223 | }; |
| 210 | 224 | ||
| 211 | union { | 225 | constexpr bool operator==(const TICEntry& rhs) const noexcept { |
| 212 | BitField<0, 4, u32> res_min_mip_level; | 226 | return raw == rhs.raw; |
| 213 | BitField<4, 4, u32> res_max_mip_level; | 227 | } |
| 214 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 215 | BitField<12, 12, u32> min_lod_clamp; | ||
| 216 | }; | ||
| 217 | 228 | ||
| 218 | GPUVAddr Address() const { | 229 | constexpr bool operator!=(const TICEntry& rhs) const noexcept { |
| 230 | return raw != rhs.raw; | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr GPUVAddr Address() const { | ||
| 219 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | 234 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); |
| 220 | } | 235 | } |
| 221 | 236 | ||
| 222 | u32 Pitch() const { | 237 | constexpr u32 Pitch() const { |
| 223 | ASSERT(header_version == TICHeaderVersion::Pitch || | 238 | ASSERT(header_version == TICHeaderVersion::Pitch || |
| 224 | header_version == TICHeaderVersion::PitchColorKey); | 239 | header_version == TICHeaderVersion::PitchColorKey); |
| 225 | // The pitch value is 21 bits, and is 32B aligned. | 240 | // The pitch value is 21 bits, and is 32B aligned. |
| 226 | return pitch_high << 5; | 241 | return pitch_high << 5; |
| 227 | } | 242 | } |
| 228 | 243 | ||
| 229 | u32 Width() const { | 244 | constexpr u32 Width() const { |
| 230 | if (header_version != TICHeaderVersion::OneDBuffer) { | 245 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 231 | return width_minus_1 + 1; | 246 | return width_minus_one + 1; |
| 232 | } | 247 | } |
| 233 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; | 248 | return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; |
| 234 | } | 249 | } |
| 235 | 250 | ||
| 236 | u32 Height() const { | 251 | constexpr u32 Height() const { |
| 237 | return height_minus_1 + 1; | 252 | return height_minus_1 + 1; |
| 238 | } | 253 | } |
| 239 | 254 | ||
| 240 | u32 Depth() const { | 255 | constexpr u32 Depth() const { |
| 241 | return depth_minus_1 + 1; | 256 | return depth_minus_1 + 1; |
| 242 | } | 257 | } |
| 243 | 258 | ||
| 244 | u32 BlockWidth() const { | 259 | constexpr u32 BaseLayer() const { |
| 245 | ASSERT(IsTiled()); | 260 | return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; |
| 246 | return block_width; | ||
| 247 | } | ||
| 248 | |||
| 249 | u32 BlockHeight() const { | ||
| 250 | ASSERT(IsTiled()); | ||
| 251 | return block_height; | ||
| 252 | } | ||
| 253 | |||
| 254 | u32 BlockDepth() const { | ||
| 255 | ASSERT(IsTiled()); | ||
| 256 | return block_depth; | ||
| 257 | } | 261 | } |
| 258 | 262 | ||
| 259 | bool IsTiled() const { | 263 | constexpr bool IsBlockLinear() const { |
| 260 | return header_version == TICHeaderVersion::BlockLinear || | 264 | return header_version == TICHeaderVersion::BlockLinear || |
| 261 | header_version == TICHeaderVersion::BlockLinearColorKey; | 265 | header_version == TICHeaderVersion::BlockLinearColorKey; |
| 262 | } | 266 | } |
| 263 | 267 | ||
| 264 | bool IsLineal() const { | 268 | constexpr bool IsPitchLinear() const { |
| 265 | return header_version == TICHeaderVersion::Pitch || | 269 | return header_version == TICHeaderVersion::Pitch || |
| 266 | header_version == TICHeaderVersion::PitchColorKey; | 270 | header_version == TICHeaderVersion::PitchColorKey; |
| 267 | } | 271 | } |
| 268 | 272 | ||
| 269 | bool IsBuffer() const { | 273 | constexpr bool IsBuffer() const { |
| 270 | return header_version == TICHeaderVersion::OneDBuffer; | 274 | return header_version == TICHeaderVersion::OneDBuffer; |
| 271 | } | 275 | } |
| 272 | |||
| 273 | bool IsSrgbConversionEnabled() const { | ||
| 274 | return srgb_conversion != 0; | ||
| 275 | } | ||
| 276 | }; | 276 | }; |
| 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); | 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); |
| 278 | 278 | ||
| @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { | |||
| 309 | Linear = 3, | 309 | Linear = 3, |
| 310 | }; | 310 | }; |
| 311 | 311 | ||
| 312 | enum class SamplerReduction : u32 { | ||
| 313 | WeightedAverage = 0, | ||
| 314 | Min = 1, | ||
| 315 | Max = 2, | ||
| 316 | }; | ||
| 317 | |||
| 312 | enum class Anisotropy { | 318 | enum class Anisotropy { |
| 313 | Default, | 319 | Default, |
| 314 | Filter2x, | 320 | Filter2x, |
| @@ -333,8 +339,12 @@ struct TSCEntry { | |||
| 333 | BitField<0, 2, TextureFilter> mag_filter; | 339 | BitField<0, 2, TextureFilter> mag_filter; |
| 334 | BitField<4, 2, TextureFilter> min_filter; | 340 | BitField<4, 2, TextureFilter> min_filter; |
| 335 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; | 341 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; |
| 342 | BitField<8, 1, u32> cubemap_anisotropy; | ||
| 336 | BitField<9, 1, u32> cubemap_interface_filtering; | 343 | BitField<9, 1, u32> cubemap_interface_filtering; |
| 344 | BitField<10, 2, SamplerReduction> reduction_filter; | ||
| 337 | BitField<12, 13, u32> mip_lod_bias; | 345 | BitField<12, 13, u32> mip_lod_bias; |
| 346 | BitField<25, 1, u32> float_coord_normalization; | ||
| 347 | BitField<26, 5, u32> trilin_opt; | ||
| 338 | }; | 348 | }; |
| 339 | union { | 349 | union { |
| 340 | BitField<0, 12, u32> min_lod_clamp; | 350 | BitField<0, 12, u32> min_lod_clamp; |
| @@ -347,32 +357,45 @@ struct TSCEntry { | |||
| 347 | }; | 357 | }; |
| 348 | std::array<f32, 4> border_color; | 358 | std::array<f32, 4> border_color; |
| 349 | }; | 359 | }; |
| 350 | std::array<u8, 0x20> raw; | 360 | std::array<u64, 4> raw; |
| 351 | }; | 361 | }; |
| 352 | 362 | ||
| 353 | std::array<float, 4> GetBorderColor() const noexcept; | 363 | constexpr bool operator==(const TSCEntry& rhs) const noexcept { |
| 364 | return raw == rhs.raw; | ||
| 365 | } | ||
| 366 | |||
| 367 | constexpr bool operator!=(const TSCEntry& rhs) const noexcept { | ||
| 368 | return raw != rhs.raw; | ||
| 369 | } | ||
| 370 | |||
| 371 | std::array<float, 4> BorderColor() const noexcept; | ||
| 354 | 372 | ||
| 355 | float GetMaxAnisotropy() const noexcept; | 373 | float MaxAnisotropy() const noexcept; |
| 356 | 374 | ||
| 357 | float GetMinLod() const { | 375 | float MinLod() const { |
| 358 | return static_cast<float>(min_lod_clamp) / 256.0f; | 376 | return static_cast<float>(min_lod_clamp) / 256.0f; |
| 359 | } | 377 | } |
| 360 | 378 | ||
| 361 | float GetMaxLod() const { | 379 | float MaxLod() const { |
| 362 | return static_cast<float>(max_lod_clamp) / 256.0f; | 380 | return static_cast<float>(max_lod_clamp) / 256.0f; |
| 363 | } | 381 | } |
| 364 | 382 | ||
| 365 | float GetLodBias() const { | 383 | float LodBias() const { |
| 366 | // Sign extend the 13-bit value. | 384 | // Sign extend the 13-bit value. |
| 367 | constexpr u32 mask = 1U << (13 - 1); | 385 | static constexpr u32 mask = 1U << (13 - 1); |
| 368 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; | 386 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; |
| 369 | } | 387 | } |
| 370 | }; | 388 | }; |
| 371 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); | 389 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |
| 372 | 390 | ||
| 373 | struct FullTextureInfo { | 391 | } // namespace Tegra::Texture |
| 374 | TICEntry tic; | 392 | |
| 375 | TSCEntry tsc; | 393 | template <> |
| 394 | struct std::hash<Tegra::Texture::TICEntry> { | ||
| 395 | size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; | ||
| 376 | }; | 396 | }; |
| 377 | 397 | ||
| 378 | } // namespace Tegra::Texture | 398 | template <> |
| 399 | struct std::hash<Tegra::Texture::TSCEntry> { | ||
| 400 | size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; | ||
| 401 | }; | ||
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 837800bfe..53444e945 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -7,8 +7,6 @@ | |||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 10 | #include "video_core/gpu_asynch.h" | ||
| 11 | #include "video_core/gpu_synch.h" | ||
| 12 | #include "video_core/renderer_base.h" | 10 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/renderer_opengl/renderer_opengl.h" | 11 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 14 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 12 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| @@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | |||
| 39 | namespace VideoCore { | 37 | namespace VideoCore { |
| 40 | 38 | ||
| 41 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { | 39 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { |
| 42 | std::unique_ptr<Tegra::GPU> gpu; | ||
| 43 | const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); | 40 | const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); |
| 44 | if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 41 | std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( |
| 45 | gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); | 42 | system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); |
| 46 | } else { | ||
| 47 | gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec); | ||
| 48 | } | ||
| 49 | 43 | ||
| 50 | auto context = emu_window.CreateSharedContext(); | 44 | auto context = emu_window.CreateSharedContext(); |
| 51 | const auto scope = context->Acquire(); | 45 | const auto scope = context->Acquire(); |
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 5b01020ec..8d10ac29e 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | |||
| @@ -32,20 +32,11 @@ namespace Vulkan { | |||
| 32 | 32 | ||
| 33 | static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; | 33 | static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; |
| 34 | 34 | ||
| 35 | NsightAftermathTracker::NsightAftermathTracker() = default; | 35 | NsightAftermathTracker::NsightAftermathTracker() { |
| 36 | |||
| 37 | NsightAftermathTracker::~NsightAftermathTracker() { | ||
| 38 | if (initialized) { | ||
| 39 | (void)GFSDK_Aftermath_DisableGpuCrashDumps(); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | bool NsightAftermathTracker::Initialize() { | ||
| 44 | if (!dl.Open(AFTERMATH_LIB_NAME)) { | 36 | if (!dl.Open(AFTERMATH_LIB_NAME)) { |
| 45 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); | 37 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); |
| 46 | return false; | 38 | return; |
| 47 | } | 39 | } |
| 48 | |||
| 49 | if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", | 40 | if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", |
| 50 | &GFSDK_Aftermath_DisableGpuCrashDumps) || | 41 | &GFSDK_Aftermath_DisableGpuCrashDumps) || |
| 51 | !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", | 42 | !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", |
| @@ -64,27 +55,28 @@ bool NsightAftermathTracker::Initialize() { | |||
| 64 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); | 55 | LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); |
| 65 | return false; | 56 | return false; |
| 66 | } | 57 | } |
| 67 | |||
| 68 | dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; | 58 | dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; |
| 69 | 59 | ||
| 70 | (void)Common::FS::DeleteDirRecursively(dump_dir); | 60 | void(Common::FS::DeleteDirRecursively(dump_dir)); |
| 71 | if (!Common::FS::CreateDir(dump_dir)) { | 61 | if (!Common::FS::CreateDir(dump_dir)) { |
| 72 | LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); | 62 | LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); |
| 73 | return false; | 63 | return; |
| 74 | } | 64 | } |
| 75 | |||
| 76 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( | 65 | if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( |
| 77 | GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, | 66 | GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, |
| 78 | GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, | 67 | GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, |
| 79 | ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { | 68 | ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { |
| 80 | LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); | 69 | LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); |
| 81 | return false; | 70 | return; |
| 82 | } | 71 | } |
| 83 | |||
| 84 | LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); | 72 | LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); |
| 85 | |||
| 86 | initialized = true; | 73 | initialized = true; |
| 87 | return true; | 74 | } |
| 75 | |||
| 76 | NsightAftermathTracker::~NsightAftermathTracker() { | ||
| 77 | if (initialized) { | ||
| 78 | (void)GFSDK_Aftermath_DisableGpuCrashDumps(); | ||
| 79 | } | ||
| 88 | } | 80 | } |
| 89 | 81 | ||
| 90 | void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { | 82 | void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { |
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index afe7ae99e..cee3847fb 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h | |||
| @@ -34,8 +34,6 @@ public: | |||
| 34 | NsightAftermathTracker(NsightAftermathTracker&&) = delete; | 34 | NsightAftermathTracker(NsightAftermathTracker&&) = delete; |
| 35 | NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; | 35 | NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; |
| 36 | 36 | ||
| 37 | bool Initialize(); | ||
| 38 | |||
| 39 | void SaveShader(const std::vector<u32>& spirv) const; | 37 | void SaveShader(const std::vector<u32>& spirv) const; |
| 40 | 38 | ||
| 41 | private: | 39 | private: |
| @@ -78,9 +76,6 @@ private: | |||
| 78 | #ifndef HAS_NSIGHT_AFTERMATH | 76 | #ifndef HAS_NSIGHT_AFTERMATH |
| 79 | inline NsightAftermathTracker::NsightAftermathTracker() = default; | 77 | inline NsightAftermathTracker::NsightAftermathTracker() = default; |
| 80 | inline NsightAftermathTracker::~NsightAftermathTracker() = default; | 78 | inline NsightAftermathTracker::~NsightAftermathTracker() = default; |
| 81 | inline bool NsightAftermathTracker::Initialize() { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} | 79 | inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} |
| 85 | #endif | 80 | #endif |
| 86 | 81 | ||
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp new file mode 100644 index 000000000..ea7af8ad4 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "video_core/vulkan_common/vulkan_debug_callback.h" | ||
| 8 | |||
| 9 | namespace Vulkan { | ||
| 10 | namespace { | ||
| 11 | VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | ||
| 12 | VkDebugUtilsMessageTypeFlagsEXT type, | ||
| 13 | const VkDebugUtilsMessengerCallbackDataEXT* data, | ||
| 14 | [[maybe_unused]] void* user_data) { | ||
| 15 | const std::string_view message{data->pMessage}; | ||
| 16 | if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { | ||
| 17 | LOG_CRITICAL(Render_Vulkan, "{}", message); | ||
| 18 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { | ||
| 19 | LOG_WARNING(Render_Vulkan, "{}", message); | ||
| 20 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { | ||
| 21 | LOG_INFO(Render_Vulkan, "{}", message); | ||
| 22 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { | ||
| 23 | LOG_DEBUG(Render_Vulkan, "{}", message); | ||
| 24 | } | ||
| 25 | return VK_FALSE; | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { | ||
| 30 | return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ | ||
| 31 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, | ||
| 32 | .pNext = nullptr, | ||
| 33 | .flags = 0, | ||
| 34 | .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | | ||
| 35 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | | ||
| 36 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | | ||
| 37 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, | ||
| 38 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | | ||
| 39 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | | ||
| 40 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, | ||
| 41 | .pfnUserCallback = Callback, | ||
| 42 | }); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h new file mode 100644 index 000000000..2efcd244c --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 6 | |||
| 7 | namespace Vulkan { | ||
| 8 | |||
| 9 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); | ||
| 10 | |||
| 11 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ce3846195..75173324e 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -13,8 +13,9 @@ | |||
| 13 | 13 | ||
| 14 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 15 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | 16 | #include "video_core/vulkan_common/nsight_aftermath_tracker.h" |
| 17 | #include "video_core/renderer_vulkan/wrapper.h" | 17 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 20 | 21 | ||
| @@ -46,6 +47,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 46 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, | 47 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, |
| 47 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | 48 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, |
| 48 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, | 49 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, |
| 50 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, | ||
| 49 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 51 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 50 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 52 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 51 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 53 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| @@ -122,6 +124,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 122 | VK_FORMAT_R16G16_UNORM, | 124 | VK_FORMAT_R16G16_UNORM, |
| 123 | VK_FORMAT_R16G16_SNORM, | 125 | VK_FORMAT_R16G16_SNORM, |
| 124 | VK_FORMAT_R16G16_SFLOAT, | 126 | VK_FORMAT_R16G16_SFLOAT, |
| 127 | VK_FORMAT_R16G16_SINT, | ||
| 125 | VK_FORMAT_R16_UNORM, | 128 | VK_FORMAT_R16_UNORM, |
| 126 | VK_FORMAT_R16_UINT, | 129 | VK_FORMAT_R16_UINT, |
| 127 | VK_FORMAT_R8G8B8A8_SRGB, | 130 | VK_FORMAT_R8G8B8A8_SRGB, |
| @@ -161,18 +164,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 161 | VK_FORMAT_BC2_SRGB_BLOCK, | 164 | VK_FORMAT_BC2_SRGB_BLOCK, |
| 162 | VK_FORMAT_BC3_SRGB_BLOCK, | 165 | VK_FORMAT_BC3_SRGB_BLOCK, |
| 163 | VK_FORMAT_BC7_SRGB_BLOCK, | 166 | VK_FORMAT_BC7_SRGB_BLOCK, |
| 167 | VK_FORMAT_ASTC_4x4_UNORM_BLOCK, | ||
| 164 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | 168 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| 165 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | 169 | VK_FORMAT_ASTC_5x4_UNORM_BLOCK, |
| 166 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | ||
| 167 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | 170 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, |
| 168 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | 171 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, |
| 169 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | 172 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, |
| 170 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | 173 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, |
| 171 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | 174 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, |
| 172 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | 175 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, |
| 173 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | 176 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, |
| 177 | VK_FORMAT_ASTC_8x5_UNORM_BLOCK, | ||
| 178 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | ||
| 179 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | ||
| 180 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | ||
| 181 | VK_FORMAT_ASTC_8x8_UNORM_BLOCK, | ||
| 182 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | ||
| 183 | VK_FORMAT_ASTC_10x5_UNORM_BLOCK, | ||
| 184 | VK_FORMAT_ASTC_10x5_SRGB_BLOCK, | ||
| 185 | VK_FORMAT_ASTC_10x6_UNORM_BLOCK, | ||
| 186 | VK_FORMAT_ASTC_10x6_SRGB_BLOCK, | ||
| 187 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | ||
| 188 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | ||
| 174 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | 189 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, |
| 175 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | 190 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, |
| 191 | VK_FORMAT_ASTC_12x10_UNORM_BLOCK, | ||
| 192 | VK_FORMAT_ASTC_12x10_SRGB_BLOCK, | ||
| 176 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | 193 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, |
| 177 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | 194 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, |
| 178 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | 195 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, |
| @@ -190,17 +207,14 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 190 | 207 | ||
| 191 | } // Anonymous namespace | 208 | } // Anonymous namespace |
| 192 | 209 | ||
| 193 | VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, | 210 | Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, |
| 194 | VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) | 211 | const vk::InstanceDispatch& dld_) |
| 195 | : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, | 212 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, |
| 196 | instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { | 213 | format_properties{GetFormatProperties(physical, dld)} { |
| 214 | CheckSuitability(); | ||
| 197 | SetupFamilies(surface); | 215 | SetupFamilies(surface); |
| 198 | SetupFeatures(); | 216 | SetupFeatures(); |
| 199 | } | ||
| 200 | 217 | ||
| 201 | VKDevice::~VKDevice() = default; | ||
| 202 | |||
| 203 | bool VKDevice::Create() { | ||
| 204 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 218 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 205 | const std::vector extensions = LoadExtensions(); | 219 | const std::vector extensions = LoadExtensions(); |
| 206 | 220 | ||
| @@ -214,7 +228,7 @@ bool VKDevice::Create() { | |||
| 214 | features2.features = { | 228 | features2.features = { |
| 215 | .robustBufferAccess = false, | 229 | .robustBufferAccess = false, |
| 216 | .fullDrawIndexUint32 = false, | 230 | .fullDrawIndexUint32 = false, |
| 217 | .imageCubeArray = false, | 231 | .imageCubeArray = true, |
| 218 | .independentBlend = true, | 232 | .independentBlend = true, |
| 219 | .geometryShader = true, | 233 | .geometryShader = true, |
| 220 | .tessellationShader = true, | 234 | .tessellationShader = true, |
| @@ -242,7 +256,7 @@ bool VKDevice::Create() { | |||
| 242 | .shaderTessellationAndGeometryPointSize = false, | 256 | .shaderTessellationAndGeometryPointSize = false, |
| 243 | .shaderImageGatherExtended = true, | 257 | .shaderImageGatherExtended = true, |
| 244 | .shaderStorageImageExtendedFormats = false, | 258 | .shaderStorageImageExtendedFormats = false, |
| 245 | .shaderStorageImageMultisample = false, | 259 | .shaderStorageImageMultisample = true, |
| 246 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, | 260 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, |
| 247 | .shaderStorageImageWriteWithoutFormat = true, | 261 | .shaderStorageImageWriteWithoutFormat = true, |
| 248 | .shaderUniformBufferArrayDynamicIndexing = false, | 262 | .shaderUniformBufferArrayDynamicIndexing = false, |
| @@ -268,7 +282,6 @@ bool VKDevice::Create() { | |||
| 268 | .variableMultisampleRate = false, | 282 | .variableMultisampleRate = false, |
| 269 | .inheritedQueries = false, | 283 | .inheritedQueries = false, |
| 270 | }; | 284 | }; |
| 271 | |||
| 272 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ | 285 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ |
| 273 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, | 286 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, |
| 274 | .pNext = nullptr, | 287 | .pNext = nullptr, |
| @@ -380,13 +393,27 @@ bool VKDevice::Create() { | |||
| 380 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); | 393 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); |
| 381 | } | 394 | } |
| 382 | 395 | ||
| 396 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 397 | if (ext_robustness2) { | ||
| 398 | robustness2 = { | ||
| 399 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, | ||
| 400 | .pNext = nullptr, | ||
| 401 | .robustBufferAccess2 = false, | ||
| 402 | .robustImageAccess2 = true, | ||
| 403 | .nullDescriptor = true, | ||
| 404 | }; | ||
| 405 | SetNext(next, robustness2); | ||
| 406 | } else { | ||
| 407 | LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); | ||
| 408 | } | ||
| 409 | |||
| 383 | if (!ext_depth_range_unrestricted) { | 410 | if (!ext_depth_range_unrestricted) { |
| 384 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 411 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 385 | } | 412 | } |
| 386 | 413 | ||
| 387 | VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; | 414 | VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; |
| 388 | if (nv_device_diagnostics_config) { | 415 | if (nv_device_diagnostics_config) { |
| 389 | nsight_aftermath_tracker.Initialize(); | 416 | nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); |
| 390 | 417 | ||
| 391 | diagnostics_nv = { | 418 | diagnostics_nv = { |
| 392 | .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, | 419 | .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, |
| @@ -397,15 +424,17 @@ bool VKDevice::Create() { | |||
| 397 | }; | 424 | }; |
| 398 | first_next = &diagnostics_nv; | 425 | first_next = &diagnostics_nv; |
| 399 | } | 426 | } |
| 400 | |||
| 401 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | 427 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); |
| 402 | if (!logical) { | ||
| 403 | LOG_ERROR(Render_Vulkan, "Failed to create logical device"); | ||
| 404 | return false; | ||
| 405 | } | ||
| 406 | 428 | ||
| 407 | CollectTelemetryParameters(); | 429 | CollectTelemetryParameters(); |
| 430 | CollectToolingInfo(); | ||
| 408 | 431 | ||
| 432 | if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { | ||
| 433 | LOG_WARNING( | ||
| 434 | Render_Vulkan, | ||
| 435 | "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); | ||
| 436 | ext_extended_dynamic_state = false; | ||
| 437 | } | ||
| 409 | if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { | 438 | if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { |
| 410 | // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it | 439 | // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it |
| 411 | // seems to cause stability issues | 440 | // seems to cause stability issues |
| @@ -419,11 +448,12 @@ bool VKDevice::Create() { | |||
| 419 | present_queue = logical.GetQueue(present_family); | 448 | present_queue = logical.GetQueue(present_family); |
| 420 | 449 | ||
| 421 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | 450 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); |
| 422 | return true; | ||
| 423 | } | 451 | } |
| 424 | 452 | ||
| 425 | VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 453 | Device::~Device() = default; |
| 426 | FormatType format_type) const { | 454 | |
| 455 | VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | ||
| 456 | FormatType format_type) const { | ||
| 427 | if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { | 457 | if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { |
| 428 | return wanted_format; | 458 | return wanted_format; |
| 429 | } | 459 | } |
| @@ -454,18 +484,20 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla | |||
| 454 | return wanted_format; | 484 | return wanted_format; |
| 455 | } | 485 | } |
| 456 | 486 | ||
| 457 | void VKDevice::ReportLoss() const { | 487 | void Device::ReportLoss() const { |
| 458 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); | 488 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); |
| 459 | 489 | ||
| 460 | // Wait for the log to flush and for Nsight Aftermath to dump the results | 490 | // Wait for the log to flush and for Nsight Aftermath to dump the results |
| 461 | std::this_thread::sleep_for(std::chrono::seconds{3}); | 491 | std::this_thread::sleep_for(std::chrono::seconds{15}); |
| 462 | } | 492 | } |
| 463 | 493 | ||
| 464 | void VKDevice::SaveShader(const std::vector<u32>& spirv) const { | 494 | void Device::SaveShader(const std::vector<u32>& spirv) const { |
| 465 | nsight_aftermath_tracker.SaveShader(spirv); | 495 | if (nsight_aftermath_tracker) { |
| 496 | nsight_aftermath_tracker->SaveShader(spirv); | ||
| 497 | } | ||
| 466 | } | 498 | } |
| 467 | 499 | ||
| 468 | bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { | 500 | bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { |
| 469 | // Disable for now to avoid converting ASTC twice. | 501 | // Disable for now to avoid converting ASTC twice. |
| 470 | static constexpr std::array astc_formats = { | 502 | static constexpr std::array astc_formats = { |
| 471 | VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | 503 | VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| @@ -499,8 +531,18 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) | |||
| 499 | return true; | 531 | return true; |
| 500 | } | 532 | } |
| 501 | 533 | ||
| 502 | bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 534 | bool Device::TestDepthStencilBlits() const { |
| 503 | FormatType format_type) const { | 535 | static constexpr VkFormatFeatureFlags required_features = |
| 536 | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; | ||
| 537 | const auto test_features = [](VkFormatProperties props) { | ||
| 538 | return (props.optimalTilingFeatures & required_features) == required_features; | ||
| 539 | }; | ||
| 540 | return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && | ||
| 541 | test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); | ||
| 542 | } | ||
| 543 | |||
| 544 | bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | ||
| 545 | FormatType format_type) const { | ||
| 504 | const auto it = format_properties.find(wanted_format); | 546 | const auto it = format_properties.find(wanted_format); |
| 505 | if (it == format_properties.end()) { | 547 | if (it == format_properties.end()) { |
| 506 | UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); | 548 | UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); |
| @@ -510,65 +552,47 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa | |||
| 510 | return (supported_usage & wanted_usage) == wanted_usage; | 552 | return (supported_usage & wanted_usage) == wanted_usage; |
| 511 | } | 553 | } |
| 512 | 554 | ||
| 513 | bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { | 555 | void Device::CheckSuitability() const { |
| 514 | bool is_suitable = true; | ||
| 515 | std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; | 556 | std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; |
| 516 | 557 | for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { | |
| 517 | for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { | ||
| 518 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | 558 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { |
| 519 | if (available_extensions[i]) { | 559 | if (available_extensions[i]) { |
| 520 | continue; | 560 | continue; |
| 521 | } | 561 | } |
| 522 | const std::string_view name{prop.extensionName}; | 562 | const std::string_view name{property.extensionName}; |
| 523 | available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; | 563 | available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; |
| 524 | } | 564 | } |
| 525 | } | 565 | } |
| 526 | if (!available_extensions.all()) { | 566 | for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { |
| 527 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | 567 | if (available_extensions[i]) { |
| 528 | if (available_extensions[i]) { | ||
| 529 | continue; | ||
| 530 | } | ||
| 531 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); | ||
| 532 | is_suitable = false; | ||
| 533 | } | ||
| 534 | } | ||
| 535 | |||
| 536 | bool has_graphics{}, has_present{}; | ||
| 537 | const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); | ||
| 538 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { | ||
| 539 | const auto& family = queue_family_properties[i]; | ||
| 540 | if (family.queueCount == 0) { | ||
| 541 | continue; | 568 | continue; |
| 542 | } | 569 | } |
| 543 | has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; | 570 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); |
| 544 | has_present |= physical.GetSurfaceSupportKHR(i, surface); | 571 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); |
| 545 | } | 572 | } |
| 546 | if (!has_graphics || !has_present) { | 573 | struct LimitTuple { |
| 547 | LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); | 574 | u32 minimum; |
| 548 | is_suitable = false; | 575 | u32 value; |
| 549 | } | 576 | const char* name; |
| 550 | 577 | }; | |
| 551 | // TODO(Rodrigo): Check if the device matches all requeriments. | 578 | const VkPhysicalDeviceLimits& limits{properties.limits}; |
| 552 | const auto properties{physical.GetProperties()}; | 579 | const std::array limits_report{ |
| 553 | const auto& limits{properties.limits}; | 580 | LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, |
| 554 | 581 | LimitTuple{16, limits.maxViewports, "maxViewports"}, | |
| 555 | constexpr u32 required_ubo_size = 65536; | 582 | LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, |
| 556 | if (limits.maxUniformBufferRange < required_ubo_size) { | 583 | LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, |
| 557 | LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required", | 584 | }; |
| 558 | limits.maxUniformBufferRange, required_ubo_size); | 585 | for (const auto& tuple : limits_report) { |
| 559 | is_suitable = false; | 586 | if (tuple.value < tuple.minimum) { |
| 560 | } | 587 | LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, |
| 561 | 588 | tuple.minimum, tuple.value); | |
| 562 | constexpr u32 required_num_viewports = 16; | 589 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 563 | if (limits.maxViewports < required_num_viewports) { | 590 | } |
| 564 | LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required", | ||
| 565 | limits.maxViewports, required_num_viewports); | ||
| 566 | is_suitable = false; | ||
| 567 | } | 591 | } |
| 568 | 592 | const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; | |
| 569 | const auto features{physical.GetFeatures()}; | 593 | const std::array feature_report{ |
| 570 | const std::array feature_report = { | ||
| 571 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 594 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 595 | std::make_pair(features.imageCubeArray, "imageCubeArray"), | ||
| 572 | std::make_pair(features.independentBlend, "independentBlend"), | 596 | std::make_pair(features.independentBlend, "independentBlend"), |
| 573 | std::make_pair(features.depthClamp, "depthClamp"), | 597 | std::make_pair(features.depthClamp, "depthClamp"), |
| 574 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), | 598 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), |
| @@ -580,25 +604,20 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { | |||
| 580 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), | 604 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), |
| 581 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), | 605 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), |
| 582 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 606 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 607 | std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), | ||
| 583 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 608 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| 584 | "shaderStorageImageWriteWithoutFormat"), | 609 | "shaderStorageImageWriteWithoutFormat"), |
| 585 | }; | 610 | }; |
| 586 | for (const auto& [supported, name] : feature_report) { | 611 | for (const auto& [is_supported, name] : feature_report) { |
| 587 | if (supported) { | 612 | if (is_supported) { |
| 588 | continue; | 613 | continue; |
| 589 | } | 614 | } |
| 590 | LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); | 615 | LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); |
| 591 | is_suitable = false; | 616 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 592 | } | 617 | } |
| 593 | |||
| 594 | if (!is_suitable) { | ||
| 595 | LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName); | ||
| 596 | } | ||
| 597 | |||
| 598 | return is_suitable; | ||
| 599 | } | 618 | } |
| 600 | 619 | ||
| 601 | std::vector<const char*> VKDevice::LoadExtensions() { | 620 | std::vector<const char*> Device::LoadExtensions() { |
| 602 | std::vector<const char*> extensions; | 621 | std::vector<const char*> extensions; |
| 603 | extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); | 622 | extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); |
| 604 | extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); | 623 | extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); |
| @@ -608,6 +627,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 608 | bool has_ext_transform_feedback{}; | 627 | bool has_ext_transform_feedback{}; |
| 609 | bool has_ext_custom_border_color{}; | 628 | bool has_ext_custom_border_color{}; |
| 610 | bool has_ext_extended_dynamic_state{}; | 629 | bool has_ext_extended_dynamic_state{}; |
| 630 | bool has_ext_robustness2{}; | ||
| 611 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | 631 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { |
| 612 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | 632 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 613 | bool push) { | 633 | bool push) { |
| @@ -627,14 +647,16 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 627 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 647 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); |
| 628 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 648 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 629 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 649 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 650 | test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); | ||
| 630 | test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, | 651 | test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, |
| 631 | true); | 652 | true); |
| 653 | test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); | ||
| 654 | test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); | ||
| 632 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); | 655 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); |
| 633 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 656 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 634 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 657 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 635 | if (instance_version >= VK_API_VERSION_1_1) { | 658 | test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); |
| 636 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 659 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 637 | } | ||
| 638 | if (Settings::values.renderer_debug) { | 660 | if (Settings::values.renderer_debug) { |
| 639 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 661 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 640 | true); | 662 | true); |
| @@ -733,41 +755,60 @@ std::vector<const char*> VKDevice::LoadExtensions() { | |||
| 733 | } | 755 | } |
| 734 | } | 756 | } |
| 735 | 757 | ||
| 758 | if (has_ext_robustness2) { | ||
| 759 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 760 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | ||
| 761 | robustness2.pNext = nullptr; | ||
| 762 | features.pNext = &robustness2; | ||
| 763 | physical.GetFeatures2KHR(features); | ||
| 764 | if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { | ||
| 765 | extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); | ||
| 766 | ext_robustness2 = true; | ||
| 767 | } | ||
| 768 | } | ||
| 769 | |||
| 736 | return extensions; | 770 | return extensions; |
| 737 | } | 771 | } |
| 738 | 772 | ||
| 739 | void VKDevice::SetupFamilies(VkSurfaceKHR surface) { | 773 | void Device::SetupFamilies(VkSurfaceKHR surface) { |
| 740 | std::optional<u32> graphics_family_, present_family_; | ||
| 741 | |||
| 742 | const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); | 774 | const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); |
| 743 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { | 775 | std::optional<u32> graphics; |
| 744 | if (graphics_family_ && present_family_) | 776 | std::optional<u32> present; |
| 777 | for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) { | ||
| 778 | if (graphics && (present || !surface)) { | ||
| 745 | break; | 779 | break; |
| 746 | 780 | } | |
| 747 | const auto& queue_family = queue_family_properties[i]; | 781 | const VkQueueFamilyProperties& queue_family = queue_family_properties[index]; |
| 748 | if (queue_family.queueCount == 0) | 782 | if (queue_family.queueCount == 0) { |
| 749 | continue; | 783 | continue; |
| 750 | 784 | } | |
| 751 | if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { | 785 | if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { |
| 752 | graphics_family_ = i; | 786 | graphics = index; |
| 753 | } | 787 | } |
| 754 | if (physical.GetSurfaceSupportKHR(i, surface)) { | 788 | if (surface && physical.GetSurfaceSupportKHR(index, surface)) { |
| 755 | present_family_ = i; | 789 | present = index; |
| 756 | } | 790 | } |
| 757 | } | 791 | } |
| 758 | ASSERT(graphics_family_ && present_family_); | 792 | if (!graphics) { |
| 759 | 793 | LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue"); | |
| 760 | graphics_family = *graphics_family_; | 794 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 761 | present_family = *present_family_; | 795 | } |
| 796 | if (surface && !present) { | ||
| 797 | LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); | ||
| 798 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); | ||
| 799 | } | ||
| 800 | graphics_family = *graphics; | ||
| 801 | present_family = *present; | ||
| 762 | } | 802 | } |
| 763 | 803 | ||
| 764 | void VKDevice::SetupFeatures() { | 804 | void Device::SetupFeatures() { |
| 765 | const auto supported_features{physical.GetFeatures()}; | 805 | const auto supported_features{physical.GetFeatures()}; |
| 766 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | 806 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 807 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); | ||
| 767 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); | 808 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); |
| 768 | } | 809 | } |
| 769 | 810 | ||
| 770 | void VKDevice::CollectTelemetryParameters() { | 811 | void Device::CollectTelemetryParameters() { |
| 771 | VkPhysicalDeviceDriverPropertiesKHR driver{ | 812 | VkPhysicalDeviceDriverPropertiesKHR driver{ |
| 772 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, | 813 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, |
| 773 | .pNext = nullptr, | 814 | .pNext = nullptr, |
| @@ -794,7 +835,33 @@ void VKDevice::CollectTelemetryParameters() { | |||
| 794 | } | 835 | } |
| 795 | } | 836 | } |
| 796 | 837 | ||
| 797 | std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { | 838 | void Device::CollectToolingInfo() { |
| 839 | if (!ext_tooling_info) { | ||
| 840 | return; | ||
| 841 | } | ||
| 842 | const auto vkGetPhysicalDeviceToolPropertiesEXT = | ||
| 843 | reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>( | ||
| 844 | dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); | ||
| 845 | if (!vkGetPhysicalDeviceToolPropertiesEXT) { | ||
| 846 | return; | ||
| 847 | } | ||
| 848 | u32 tool_count = 0; | ||
| 849 | if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { | ||
| 850 | return; | ||
| 851 | } | ||
| 852 | std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count); | ||
| 853 | if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { | ||
| 854 | return; | ||
| 855 | } | ||
| 856 | for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { | ||
| 857 | const std::string_view name = tool.name; | ||
| 858 | LOG_INFO(Render_Vulkan, "{}", name); | ||
| 859 | has_renderdoc = has_renderdoc || name == "RenderDoc"; | ||
| 860 | has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const { | ||
| 798 | static constexpr float QUEUE_PRIORITY = 1.0f; | 865 | static constexpr float QUEUE_PRIORITY = 1.0f; |
| 799 | 866 | ||
| 800 | std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; | 867 | std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4286673d9..a973c3ce4 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -10,11 +10,12 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 15 | 14 | ||
| 16 | namespace Vulkan { | 15 | namespace Vulkan { |
| 17 | 16 | ||
| 17 | class NsightAftermathTracker; | ||
| 18 | |||
| 18 | /// Format usage descriptor. | 19 | /// Format usage descriptor. |
| 19 | enum class FormatType { Linear, Optimal, Buffer }; | 20 | enum class FormatType { Linear, Optimal, Buffer }; |
| 20 | 21 | ||
| @@ -22,14 +23,11 @@ enum class FormatType { Linear, Optimal, Buffer }; | |||
| 22 | const u32 GuestWarpSize = 32; | 23 | const u32 GuestWarpSize = 32; |
| 23 | 24 | ||
| 24 | /// Handles data specific to a physical device. | 25 | /// Handles data specific to a physical device. |
| 25 | class VKDevice final { | 26 | class Device final { |
| 26 | public: | 27 | public: |
| 27 | explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical, | 28 | explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, |
| 28 | VkSurfaceKHR surface, const vk::InstanceDispatch& dld); | 29 | const vk::InstanceDispatch& dld); |
| 29 | ~VKDevice(); | 30 | ~Device(); |
| 30 | |||
| 31 | /// Initializes the device. Returns true on success. | ||
| 32 | bool Create(); | ||
| 33 | 31 | ||
| 34 | /** | 32 | /** |
| 35 | * Returns a format supported by the device for the passed requeriments. | 33 | * Returns a format supported by the device for the passed requeriments. |
| @@ -82,11 +80,6 @@ public: | |||
| 82 | return present_family; | 80 | return present_family; |
| 83 | } | 81 | } |
| 84 | 82 | ||
| 85 | /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers. | ||
| 86 | u32 InstanceApiVersion() const { | ||
| 87 | return instance_version; | ||
| 88 | } | ||
| 89 | |||
| 90 | /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. | 83 | /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. |
| 91 | u32 ApiVersion() const { | 84 | u32 ApiVersion() const { |
| 92 | return properties.apiVersion; | 85 | return properties.apiVersion; |
| @@ -157,6 +150,11 @@ public: | |||
| 157 | return is_formatless_image_load_supported; | 150 | return is_formatless_image_load_supported; |
| 158 | } | 151 | } |
| 159 | 152 | ||
| 153 | /// Returns true when blitting from and to depth stencil images is supported. | ||
| 154 | bool IsBlitDepthStencilSupported() const { | ||
| 155 | return is_blit_depth_stencil_supported; | ||
| 156 | } | ||
| 157 | |||
| 160 | /// Returns true if the device supports VK_NV_viewport_swizzle. | 158 | /// Returns true if the device supports VK_NV_viewport_swizzle. |
| 161 | bool IsNvViewportSwizzleSupported() const { | 159 | bool IsNvViewportSwizzleSupported() const { |
| 162 | return nv_viewport_swizzle; | 160 | return nv_viewport_swizzle; |
| @@ -172,6 +170,11 @@ public: | |||
| 172 | return ext_index_type_uint8; | 170 | return ext_index_type_uint8; |
| 173 | } | 171 | } |
| 174 | 172 | ||
| 173 | /// Returns true if the device supports VK_EXT_sampler_filter_minmax. | ||
| 174 | bool IsExtSamplerFilterMinmaxSupported() const { | ||
| 175 | return ext_sampler_filter_minmax; | ||
| 176 | } | ||
| 177 | |||
| 175 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. | 178 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. |
| 176 | bool IsExtDepthRangeUnrestrictedSupported() const { | 179 | bool IsExtDepthRangeUnrestrictedSupported() const { |
| 177 | return ext_depth_range_unrestricted; | 180 | return ext_depth_range_unrestricted; |
| @@ -197,6 +200,16 @@ public: | |||
| 197 | return ext_extended_dynamic_state; | 200 | return ext_extended_dynamic_state; |
| 198 | } | 201 | } |
| 199 | 202 | ||
| 203 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | ||
| 204 | bool IsExtShaderStencilExportSupported() const { | ||
| 205 | return ext_shader_stencil_export; | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Returns true when a known debugging tool is attached. | ||
| 209 | bool HasDebuggingToolAttached() const { | ||
| 210 | return has_renderdoc || has_nsight_graphics; | ||
| 211 | } | ||
| 212 | |||
| 200 | /// Returns the vendor name reported from Vulkan. | 213 | /// Returns the vendor name reported from Vulkan. |
| 201 | std::string_view GetVendorName() const { | 214 | std::string_view GetVendorName() const { |
| 202 | return vendor_name; | 215 | return vendor_name; |
| @@ -212,10 +225,10 @@ public: | |||
| 212 | return use_asynchronous_shaders; | 225 | return use_asynchronous_shaders; |
| 213 | } | 226 | } |
| 214 | 227 | ||
| 228 | private: | ||
| 215 | /// Checks if the physical device is suitable. | 229 | /// Checks if the physical device is suitable. |
| 216 | static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); | 230 | void CheckSuitability() const; |
| 217 | 231 | ||
| 218 | private: | ||
| 219 | /// Loads extensions into a vector and stores available ones in this object. | 232 | /// Loads extensions into a vector and stores available ones in this object. |
| 220 | std::vector<const char*> LoadExtensions(); | 233 | std::vector<const char*> LoadExtensions(); |
| 221 | 234 | ||
| @@ -228,16 +241,23 @@ private: | |||
| 228 | /// Collects telemetry information from the device. | 241 | /// Collects telemetry information from the device. |
| 229 | void CollectTelemetryParameters(); | 242 | void CollectTelemetryParameters(); |
| 230 | 243 | ||
| 244 | /// Collects information about attached tools. | ||
| 245 | void CollectToolingInfo(); | ||
| 246 | |||
| 231 | /// Returns a list of queue initialization descriptors. | 247 | /// Returns a list of queue initialization descriptors. |
| 232 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | 248 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; |
| 233 | 249 | ||
| 234 | /// Returns true if ASTC textures are natively supported. | 250 | /// Returns true if ASTC textures are natively supported. |
| 235 | bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; | 251 | bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; |
| 236 | 252 | ||
| 253 | /// Returns true if the device natively supports blitting depth stencil images. | ||
| 254 | bool TestDepthStencilBlits() const; | ||
| 255 | |||
| 237 | /// Returns true if a format is supported. | 256 | /// Returns true if a format is supported. |
| 238 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 257 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 239 | FormatType format_type) const; | 258 | FormatType format_type) const; |
| 240 | 259 | ||
| 260 | VkInstance instance; ///< Vulkan instance. | ||
| 241 | vk::DeviceDispatch dld; ///< Device function pointers. | 261 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 242 | vk::PhysicalDevice physical; ///< Physical device. | 262 | vk::PhysicalDevice physical; ///< Physical device. |
| 243 | VkPhysicalDeviceProperties properties; ///< Device properties. | 263 | VkPhysicalDeviceProperties properties; ///< Device properties. |
| @@ -253,15 +273,22 @@ private: | |||
| 253 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 273 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 254 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 274 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 255 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | 275 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. |
| 276 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. | ||
| 256 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 277 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 257 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 278 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 258 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 279 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 280 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. | ||
| 259 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 281 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 260 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 282 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 283 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. | ||
| 261 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 284 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 262 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 285 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. |
| 263 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 286 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 287 | bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. | ||
| 288 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | ||
| 264 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 289 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 290 | bool has_renderdoc{}; ///< Has RenderDoc attached | ||
| 291 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | ||
| 265 | 292 | ||
| 266 | // Asynchronous Graphics Pipeline setting | 293 | // Asynchronous Graphics Pipeline setting |
| 267 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | 294 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline |
| @@ -274,7 +301,7 @@ private: | |||
| 274 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 301 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
| 275 | 302 | ||
| 276 | /// Nsight Aftermath GPU crash tracker | 303 | /// Nsight Aftermath GPU crash tracker |
| 277 | NsightAftermathTracker nsight_aftermath_tracker; | 304 | std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; |
| 278 | }; | 305 | }; |
| 279 | 306 | ||
| 280 | } // namespace Vulkan | 307 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp new file mode 100644 index 000000000..889ecda0c --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.cpp | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <span> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/dynamic_library.h" | ||
| 13 | #include "common/logging/log.h" | ||
| 14 | #include "core/frontend/emu_window.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_instance.h" | ||
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 17 | |||
| 18 | // Include these late to avoid polluting previous headers | ||
| 19 | #ifdef _WIN32 | ||
| 20 | #include <windows.h> | ||
| 21 | // ensure include order | ||
| 22 | #include <vulkan/vulkan_win32.h> | ||
| 23 | #endif | ||
| 24 | |||
| 25 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 26 | #include <X11/Xlib.h> | ||
| 27 | #include <vulkan/vulkan_wayland.h> | ||
| 28 | #include <vulkan/vulkan_xlib.h> | ||
| 29 | #endif | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | namespace { | ||
| 33 | [[nodiscard]] std::vector<const char*> RequiredExtensions( | ||
| 34 | Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { | ||
| 35 | std::vector<const char*> extensions; | ||
| 36 | extensions.reserve(6); | ||
| 37 | switch (window_type) { | ||
| 38 | case Core::Frontend::WindowSystemType::Headless: | ||
| 39 | break; | ||
| 40 | #ifdef _WIN32 | ||
| 41 | case Core::Frontend::WindowSystemType::Windows: | ||
| 42 | extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); | ||
| 43 | break; | ||
| 44 | #endif | ||
| 45 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 46 | case Core::Frontend::WindowSystemType::X11: | ||
| 47 | extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); | ||
| 48 | break; | ||
| 49 | case Core::Frontend::WindowSystemType::Wayland: | ||
| 50 | extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); | ||
| 51 | break; | ||
| 52 | #endif | ||
| 53 | default: | ||
| 54 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | if (window_type != Core::Frontend::WindowSystemType::Headless) { | ||
| 58 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | ||
| 59 | } | ||
| 60 | if (enable_debug_utils) { | ||
| 61 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | ||
| 62 | } | ||
| 63 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | ||
| 64 | return extensions; | ||
| 65 | } | ||
| 66 | |||
| 67 | [[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, | ||
| 68 | std::span<const char* const> extensions) { | ||
| 69 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 70 | if (!properties) { | ||
| 71 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | for (const char* extension : extensions) { | ||
| 75 | const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { | ||
| 76 | return std::strcmp(extension, prop.extensionName) == 0; | ||
| 77 | }); | ||
| 78 | if (it == properties->end()) { | ||
| 79 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 80 | return false; | ||
| 81 | } | ||
| 82 | } | ||
| 83 | return true; | ||
| 84 | } | ||
| 85 | |||
| 86 | [[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { | ||
| 87 | std::vector<const char*> layers; | ||
| 88 | if (enable_layers) { | ||
| 89 | layers.push_back("VK_LAYER_KHRONOS_validation"); | ||
| 90 | } | ||
| 91 | return layers; | ||
| 92 | } | ||
| 93 | |||
| 94 | void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const char*>& layers) { | ||
| 95 | const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); | ||
| 96 | if (!layer_properties) { | ||
| 97 | LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); | ||
| 98 | layers.clear(); | ||
| 99 | } | ||
| 100 | std::erase_if(layers, [&layer_properties](const char* layer) { | ||
| 101 | const auto comp = [layer](const VkLayerProperties& layer_property) { | ||
| 102 | return std::strcmp(layer, layer_property.layerName) == 0; | ||
| 103 | }; | ||
| 104 | const auto it = std::ranges::find_if(*layer_properties, comp); | ||
| 105 | if (it == layer_properties->end()) { | ||
| 106 | LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); | ||
| 107 | return true; | ||
| 108 | } | ||
| 109 | return false; | ||
| 110 | }); | ||
| 111 | } | ||
| 112 | } // Anonymous namespace | ||
| 113 | |||
| 114 | vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, | ||
| 115 | u32 required_version, Core::Frontend::WindowSystemType window_type, | ||
| 116 | bool enable_debug_utils, bool enable_layers) { | ||
| 117 | if (!library.IsOpen()) { | ||
| 118 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); | ||
| 119 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 120 | } | ||
| 121 | if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { | ||
| 122 | LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); | ||
| 123 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 124 | } | ||
| 125 | if (!vk::Load(dld)) { | ||
| 126 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | ||
| 127 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 128 | } | ||
| 129 | const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); | ||
| 130 | if (!AreExtensionsSupported(dld, extensions)) { | ||
| 131 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | ||
| 132 | } | ||
| 133 | std::vector<const char*> layers = Layers(enable_layers); | ||
| 134 | RemoveUnavailableLayers(dld, layers); | ||
| 135 | |||
| 136 | const u32 available_version = vk::AvailableVersion(dld); | ||
| 137 | if (available_version < required_version) { | ||
| 138 | LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required", | ||
| 139 | VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version), | ||
| 140 | VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); | ||
| 141 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | ||
| 142 | } | ||
| 143 | vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); | ||
| 144 | if (!vk::Load(*instance, dld)) { | ||
| 145 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); | ||
| 146 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 147 | } | ||
| 148 | return instance; | ||
| 149 | } | ||
| 150 | |||
| 151 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h new file mode 100644 index 000000000..e5e3a7144 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_instance.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/dynamic_library.h" | ||
| 9 | #include "core/frontend/emu_window.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 11 | |||
| 12 | namespace Vulkan { | ||
| 13 | |||
| 14 | /** | ||
| 15 | * Create a Vulkan instance | ||
| 16 | * | ||
| 17 | * @param library Dynamic library to load the Vulkan instance from | ||
| 18 | * @param dld Dispatch table to load function pointers into | ||
| 19 | * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) | ||
| 20 | * @param window_type Window system type's enabled extension | ||
| 21 | * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not | ||
| 22 | * @param enable_layers Whether to enable Vulkan validation layers or not | ||
| 23 | * | ||
| 24 | * @return A new Vulkan instance | ||
| 25 | * @throw vk::Exception on failure | ||
| 26 | */ | ||
| 27 | [[nodiscard]] vk::Instance CreateInstance( | ||
| 28 | const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, | ||
| 29 | Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, | ||
| 30 | bool enable_debug_utils = false, bool enable_layers = false); | ||
| 31 | |||
| 32 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp new file mode 100644 index 000000000..557871d81 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstdlib> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "common/dynamic_library.h" | ||
| 9 | #include "common/file_util.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_library.h" | ||
| 11 | |||
| 12 | namespace Vulkan { | ||
| 13 | |||
| 14 | Common::DynamicLibrary OpenLibrary() { | ||
| 15 | Common::DynamicLibrary library; | ||
| 16 | #ifdef __APPLE__ | ||
| 17 | // Check if a path to a specific Vulkan library has been specified. | ||
| 18 | char* const libvulkan_env = std::getenv("LIBVULKAN_PATH"); | ||
| 19 | if (!libvulkan_env || !library.Open(libvulkan_env)) { | ||
| 20 | // Use the libvulkan.dylib from the application bundle. | ||
| 21 | const std::string filename = | ||
| 22 | Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; | ||
| 23 | void(library.Open(filename.c_str())); | ||
| 24 | } | ||
| 25 | #else | ||
| 26 | std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); | ||
| 27 | if (!library.Open(filename.c_str())) { | ||
| 28 | // Android devices may not have libvulkan.so.1, only libvulkan.so. | ||
| 29 | filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); | ||
| 30 | void(library.Open(filename.c_str())); | ||
| 31 | } | ||
| 32 | #endif | ||
| 33 | return library; | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h new file mode 100644 index 000000000..8b28b0e17 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_library.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/dynamic_library.h" | ||
| 8 | |||
| 9 | namespace Vulkan { | ||
| 10 | |||
| 11 | Common::DynamicLibrary OpenLibrary(); | ||
| 12 | |||
| 13 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp new file mode 100644 index 000000000..3c3238f96 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.cpp | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/frontend/emu_window.h" | ||
| 7 | #include "video_core/vulkan_common/vulkan_surface.h" | ||
| 8 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 9 | |||
| 10 | // Include these late to avoid polluting previous headers | ||
| 11 | #ifdef _WIN32 | ||
| 12 | #include <windows.h> | ||
| 13 | // ensure include order | ||
| 14 | #include <vulkan/vulkan_win32.h> | ||
| 15 | #endif | ||
| 16 | |||
| 17 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 18 | #include <X11/Xlib.h> | ||
| 19 | #include <vulkan/vulkan_wayland.h> | ||
| 20 | #include <vulkan/vulkan_xlib.h> | ||
| 21 | #endif | ||
| 22 | |||
| 23 | namespace Vulkan { | ||
| 24 | |||
| 25 | vk::SurfaceKHR CreateSurface(const vk::Instance& instance, | ||
| 26 | const Core::Frontend::EmuWindow& emu_window) { | ||
| 27 | [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); | ||
| 28 | [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo(); | ||
| 29 | VkSurfaceKHR unsafe_surface = nullptr; | ||
| 30 | |||
| 31 | #ifdef _WIN32 | ||
| 32 | if (window_info.type == Core::Frontend::WindowSystemType::Windows) { | ||
| 33 | const HWND hWnd = static_cast<HWND>(window_info.render_surface); | ||
| 34 | const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, | ||
| 35 | nullptr, 0, nullptr, hWnd}; | ||
| 36 | const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( | ||
| 37 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); | ||
| 38 | if (!vkCreateWin32SurfaceKHR || | ||
| 39 | vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 40 | LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); | ||
| 41 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | #endif | ||
| 45 | #if !defined(_WIN32) && !defined(__APPLE__) | ||
| 46 | if (window_info.type == Core::Frontend::WindowSystemType::X11) { | ||
| 47 | const VkXlibSurfaceCreateInfoKHR xlib_ci{ | ||
| 48 | VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 49 | static_cast<Display*>(window_info.display_connection), | ||
| 50 | reinterpret_cast<Window>(window_info.render_surface)}; | ||
| 51 | const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( | ||
| 52 | dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); | ||
| 53 | if (!vkCreateXlibSurfaceKHR || | ||
| 54 | vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 55 | LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); | ||
| 56 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { | ||
| 60 | const VkWaylandSurfaceCreateInfoKHR wayland_ci{ | ||
| 61 | VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 62 | static_cast<wl_display*>(window_info.display_connection), | ||
| 63 | static_cast<wl_surface*>(window_info.render_surface)}; | ||
| 64 | const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( | ||
| 65 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); | ||
| 66 | if (!vkCreateWaylandSurfaceKHR || | ||
| 67 | vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != | ||
| 68 | VK_SUCCESS) { | ||
| 69 | LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); | ||
| 70 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | #endif | ||
| 74 | if (!unsafe_surface) { | ||
| 75 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 76 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 77 | } | ||
| 78 | return vk::SurfaceKHR(unsafe_surface, *instance, dld); | ||
| 79 | } | ||
| 80 | |||
| 81 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h new file mode 100644 index 000000000..05a169e32 --- /dev/null +++ b/src/video_core/vulkan_common/vulkan_surface.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 8 | |||
| 9 | namespace Core::Frontend { | ||
| 10 | class EmuWindow; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Vulkan { | ||
| 14 | |||
| 15 | [[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance, | ||
| 16 | const Core::Frontend::EmuWindow& emu_window); | ||
| 17 | |||
| 18 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 1eced809e..5e15ad607 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | 15 | ||
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 17 | 17 | ||
| 18 | namespace Vulkan::vk { | 18 | namespace Vulkan::vk { |
| 19 | 19 | ||
| @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 81 | X(vkCmdBeginQuery); | 81 | X(vkCmdBeginQuery); |
| 82 | X(vkCmdBeginRenderPass); | 82 | X(vkCmdBeginRenderPass); |
| 83 | X(vkCmdBeginTransformFeedbackEXT); | 83 | X(vkCmdBeginTransformFeedbackEXT); |
| 84 | X(vkCmdBeginDebugUtilsLabelEXT); | ||
| 84 | X(vkCmdBindDescriptorSets); | 85 | X(vkCmdBindDescriptorSets); |
| 85 | X(vkCmdBindIndexBuffer); | 86 | X(vkCmdBindIndexBuffer); |
| 86 | X(vkCmdBindPipeline); | 87 | X(vkCmdBindPipeline); |
| @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 98 | X(vkCmdEndQuery); | 99 | X(vkCmdEndQuery); |
| 99 | X(vkCmdEndRenderPass); | 100 | X(vkCmdEndRenderPass); |
| 100 | X(vkCmdEndTransformFeedbackEXT); | 101 | X(vkCmdEndTransformFeedbackEXT); |
| 102 | X(vkCmdEndDebugUtilsLabelEXT); | ||
| 101 | X(vkCmdFillBuffer); | 103 | X(vkCmdFillBuffer); |
| 102 | X(vkCmdPipelineBarrier); | 104 | X(vkCmdPipelineBarrier); |
| 103 | X(vkCmdPushConstants); | 105 | X(vkCmdPushConstants); |
| @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 121 | X(vkCmdSetPrimitiveTopologyEXT); | 123 | X(vkCmdSetPrimitiveTopologyEXT); |
| 122 | X(vkCmdSetStencilOpEXT); | 124 | X(vkCmdSetStencilOpEXT); |
| 123 | X(vkCmdSetStencilTestEnableEXT); | 125 | X(vkCmdSetStencilTestEnableEXT); |
| 126 | X(vkCmdResolveImage); | ||
| 124 | X(vkCreateBuffer); | 127 | X(vkCreateBuffer); |
| 125 | X(vkCreateBufferView); | 128 | X(vkCreateBufferView); |
| 126 | X(vkCreateCommandPool); | 129 | X(vkCreateCommandPool); |
| @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 176 | X(vkQueueSubmit); | 179 | X(vkQueueSubmit); |
| 177 | X(vkResetFences); | 180 | X(vkResetFences); |
| 178 | X(vkResetQueryPoolEXT); | 181 | X(vkResetQueryPoolEXT); |
| 182 | X(vkSetDebugUtilsObjectNameEXT); | ||
| 183 | X(vkSetDebugUtilsObjectTagEXT); | ||
| 179 | X(vkUnmapMemory); | 184 | X(vkUnmapMemory); |
| 180 | X(vkUpdateDescriptorSetWithTemplateKHR); | 185 | X(vkUpdateDescriptorSetWithTemplateKHR); |
| 181 | X(vkUpdateDescriptorSets); | 186 | X(vkUpdateDescriptorSets); |
| @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 184 | #undef X | 189 | #undef X |
| 185 | } | 190 | } |
| 186 | 191 | ||
| 192 | template <typename T> | ||
| 193 | void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, | ||
| 194 | const char* name) { | ||
| 195 | const VkDebugUtilsObjectNameInfoEXT name_info{ | ||
| 196 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, | ||
| 197 | .pNext = nullptr, | ||
| 198 | .objectType = VK_OBJECT_TYPE_IMAGE, | ||
| 199 | .objectHandle = reinterpret_cast<u64>(handle), | ||
| 200 | .pObjectName = name, | ||
| 201 | }; | ||
| 202 | Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); | ||
| 203 | } | ||
| 204 | |||
| 187 | } // Anonymous namespace | 205 | } // Anonymous namespace |
| 188 | 206 | ||
| 189 | bool Load(InstanceDispatch& dld) noexcept { | 207 | bool Load(InstanceDispatch& dld) noexcept { |
| @@ -417,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe | |||
| 417 | } | 435 | } |
| 418 | 436 | ||
| 419 | Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, | 437 | Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, |
| 420 | InstanceDispatch& dispatch) noexcept { | 438 | InstanceDispatch& dispatch) { |
| 421 | const VkApplicationInfo application_info{ | 439 | const VkApplicationInfo application_info{ |
| 422 | .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, | 440 | .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, |
| 423 | .pNext = nullptr, | 441 | .pNext = nullptr, |
| @@ -437,66 +455,68 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char | |||
| 437 | .enabledExtensionCount = extensions.size(), | 455 | .enabledExtensionCount = extensions.size(), |
| 438 | .ppEnabledExtensionNames = extensions.data(), | 456 | .ppEnabledExtensionNames = extensions.data(), |
| 439 | }; | 457 | }; |
| 440 | |||
| 441 | VkInstance instance; | 458 | VkInstance instance; |
| 442 | if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { | 459 | Check(dispatch.vkCreateInstance(&ci, nullptr, &instance)); |
| 443 | // Failed to create the instance. | ||
| 444 | return {}; | ||
| 445 | } | ||
| 446 | if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { | 460 | if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { |
| 447 | // We successfully created an instance but the destroy function couldn't be loaded. | 461 | // We successfully created an instance but the destroy function couldn't be loaded. |
| 448 | // This is a good moment to panic. | 462 | // This is a good moment to panic. |
| 449 | return {}; | 463 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| 450 | } | 464 | } |
| 451 | |||
| 452 | return Instance(instance, dispatch); | 465 | return Instance(instance, dispatch); |
| 453 | } | 466 | } |
| 454 | 467 | ||
| 455 | std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { | 468 | std::vector<VkPhysicalDevice> Instance::EnumeratePhysicalDevices() const { |
| 456 | u32 num; | 469 | u32 num; |
| 457 | if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { | 470 | Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr)); |
| 458 | return std::nullopt; | ||
| 459 | } | ||
| 460 | std::vector<VkPhysicalDevice> physical_devices(num); | 471 | std::vector<VkPhysicalDevice> physical_devices(num); |
| 461 | if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { | 472 | Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data())); |
| 462 | return std::nullopt; | ||
| 463 | } | ||
| 464 | SortPhysicalDevices(physical_devices, *dld); | 473 | SortPhysicalDevices(physical_devices, *dld); |
| 465 | return std::make_optional(std::move(physical_devices)); | 474 | return physical_devices; |
| 466 | } | 475 | } |
| 467 | 476 | ||
| 468 | DebugCallback Instance::TryCreateDebugCallback( | 477 | DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( |
| 469 | PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { | 478 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const { |
| 470 | const VkDebugUtilsMessengerCreateInfoEXT ci{ | 479 | VkDebugUtilsMessengerEXT object; |
| 471 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, | 480 | Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object)); |
| 472 | .pNext = nullptr, | 481 | return DebugUtilsMessenger(object, handle, *dld); |
| 473 | .flags = 0, | ||
| 474 | .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | | ||
| 475 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | | ||
| 476 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | | ||
| 477 | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, | ||
| 478 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | | ||
| 479 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | | ||
| 480 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, | ||
| 481 | .pfnUserCallback = callback, | ||
| 482 | .pUserData = nullptr, | ||
| 483 | }; | ||
| 484 | |||
| 485 | VkDebugUtilsMessengerEXT messenger; | ||
| 486 | if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { | ||
| 487 | return {}; | ||
| 488 | } | ||
| 489 | return DebugCallback(messenger, handle, *dld); | ||
| 490 | } | 482 | } |
| 491 | 483 | ||
| 492 | void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 484 | void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { |
| 493 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); | 485 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); |
| 494 | } | 486 | } |
| 495 | 487 | ||
| 488 | void Buffer::SetObjectNameEXT(const char* name) const { | ||
| 489 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); | ||
| 490 | } | ||
| 491 | |||
| 492 | void BufferView::SetObjectNameEXT(const char* name) const { | ||
| 493 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); | ||
| 494 | } | ||
| 495 | |||
| 496 | void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 496 | void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { |
| 497 | Check(dld->vkBindImageMemory(owner, handle, memory, offset)); | 497 | Check(dld->vkBindImageMemory(owner, handle, memory, offset)); |
| 498 | } | 498 | } |
| 499 | 499 | ||
| 500 | void Image::SetObjectNameEXT(const char* name) const { | ||
| 501 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); | ||
| 502 | } | ||
| 503 | |||
| 504 | void ImageView::SetObjectNameEXT(const char* name) const { | ||
| 505 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); | ||
| 506 | } | ||
| 507 | |||
| 508 | void DeviceMemory::SetObjectNameEXT(const char* name) const { | ||
| 509 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); | ||
| 510 | } | ||
| 511 | |||
| 512 | void Fence::SetObjectNameEXT(const char* name) const { | ||
| 513 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); | ||
| 514 | } | ||
| 515 | |||
| 516 | void Framebuffer::SetObjectNameEXT(const char* name) const { | ||
| 517 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); | ||
| 518 | } | ||
| 519 | |||
| 500 | DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { | 520 | DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { |
| 501 | const std::size_t num = ai.descriptorSetCount; | 521 | const std::size_t num = ai.descriptorSetCount; |
| 502 | std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); | 522 | std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); |
| @@ -510,6 +530,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c | |||
| 510 | } | 530 | } |
| 511 | } | 531 | } |
| 512 | 532 | ||
| 533 | void DescriptorPool::SetObjectNameEXT(const char* name) const { | ||
| 534 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); | ||
| 535 | } | ||
| 536 | |||
| 513 | CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { | 537 | CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { |
| 514 | const VkCommandBufferAllocateInfo ai{ | 538 | const VkCommandBufferAllocateInfo ai{ |
| 515 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, | 539 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, |
| @@ -530,6 +554,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev | |||
| 530 | } | 554 | } |
| 531 | } | 555 | } |
| 532 | 556 | ||
| 557 | void CommandPool::SetObjectNameEXT(const char* name) const { | ||
| 558 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); | ||
| 559 | } | ||
| 560 | |||
| 533 | std::vector<VkImage> SwapchainKHR::GetImages() const { | 561 | std::vector<VkImage> SwapchainKHR::GetImages() const { |
| 534 | u32 num; | 562 | u32 num; |
| 535 | Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); | 563 | Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); |
| @@ -538,9 +566,21 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { | |||
| 538 | return images; | 566 | return images; |
| 539 | } | 567 | } |
| 540 | 568 | ||
| 569 | void Event::SetObjectNameEXT(const char* name) const { | ||
| 570 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); | ||
| 571 | } | ||
| 572 | |||
| 573 | void ShaderModule::SetObjectNameEXT(const char* name) const { | ||
| 574 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); | ||
| 575 | } | ||
| 576 | |||
| 577 | void Semaphore::SetObjectNameEXT(const char* name) const { | ||
| 578 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); | ||
| 579 | } | ||
| 580 | |||
| 541 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, | 581 | Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, |
| 542 | Span<const char*> enabled_extensions, const void* next, | 582 | Span<const char*> enabled_extensions, const void* next, |
| 543 | DeviceDispatch& dispatch) noexcept { | 583 | DeviceDispatch& dispatch) { |
| 544 | const VkDeviceCreateInfo ci{ | 584 | const VkDeviceCreateInfo ci{ |
| 545 | .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, | 585 | .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, |
| 546 | .pNext = next, | 586 | .pNext = next, |
| @@ -553,11 +593,8 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate | |||
| 553 | .ppEnabledExtensionNames = enabled_extensions.data(), | 593 | .ppEnabledExtensionNames = enabled_extensions.data(), |
| 554 | .pEnabledFeatures = nullptr, | 594 | .pEnabledFeatures = nullptr, |
| 555 | }; | 595 | }; |
| 556 | |||
| 557 | VkDevice device; | 596 | VkDevice device; |
| 558 | if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { | 597 | Check(dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device)); |
| 559 | return {}; | ||
| 560 | } | ||
| 561 | Load(device, dispatch); | 598 | Load(device, dispatch); |
| 562 | return Device(device, dispatch); | 599 | return Device(device, dispatch); |
| 563 | } | 600 | } |
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 76f790eab..912cab46c 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <limits> | 9 | #include <limits> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <optional> | 11 | #include <optional> |
| 12 | #include <span> | ||
| 12 | #include <type_traits> | 13 | #include <type_traits> |
| 13 | #include <utility> | 14 | #include <utility> |
| 14 | #include <vector> | 15 | #include <vector> |
| @@ -18,6 +19,10 @@ | |||
| 18 | 19 | ||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | 21 | ||
| 22 | #ifdef _MSC_VER | ||
| 23 | #pragma warning(disable : 26812) // Disable prefer enum class over enum | ||
| 24 | #endif | ||
| 25 | |||
| 21 | namespace Vulkan::vk { | 26 | namespace Vulkan::vk { |
| 22 | 27 | ||
| 23 | /** | 28 | /** |
| @@ -41,6 +46,9 @@ public: | |||
| 41 | /// Construct an empty span. | 46 | /// Construct an empty span. |
| 42 | constexpr Span() noexcept = default; | 47 | constexpr Span() noexcept = default; |
| 43 | 48 | ||
| 49 | /// Construct an empty span | ||
| 50 | constexpr Span(std::nullptr_t) noexcept {} | ||
| 51 | |||
| 44 | /// Construct a span from a single element. | 52 | /// Construct a span from a single element. |
| 45 | constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} | 53 | constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} |
| 46 | 54 | ||
| @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 177 | PFN_vkCmdBeginQuery vkCmdBeginQuery; | 185 | PFN_vkCmdBeginQuery vkCmdBeginQuery; |
| 178 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; | 186 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; |
| 179 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; | 187 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; |
| 188 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; | ||
| 180 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; | 189 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; |
| 181 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; | 190 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; |
| 182 | PFN_vkCmdBindPipeline vkCmdBindPipeline; | 191 | PFN_vkCmdBindPipeline vkCmdBindPipeline; |
| @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 194 | PFN_vkCmdEndQuery vkCmdEndQuery; | 203 | PFN_vkCmdEndQuery vkCmdEndQuery; |
| 195 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass; | 204 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass; |
| 196 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; | 205 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; |
| 206 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; | ||
| 197 | PFN_vkCmdFillBuffer vkCmdFillBuffer; | 207 | PFN_vkCmdFillBuffer vkCmdFillBuffer; |
| 198 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; | 208 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; |
| 199 | PFN_vkCmdPushConstants vkCmdPushConstants; | 209 | PFN_vkCmdPushConstants vkCmdPushConstants; |
| @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 217 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; | 227 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; |
| 218 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; | 228 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; |
| 219 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; | 229 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; |
| 230 | PFN_vkCmdResolveImage vkCmdResolveImage; | ||
| 220 | PFN_vkCreateBuffer vkCreateBuffer; | 231 | PFN_vkCreateBuffer vkCreateBuffer; |
| 221 | PFN_vkCreateBufferView vkCreateBufferView; | 232 | PFN_vkCreateBufferView vkCreateBufferView; |
| 222 | PFN_vkCreateCommandPool vkCreateCommandPool; | 233 | PFN_vkCreateCommandPool vkCreateCommandPool; |
| @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 272 | PFN_vkQueueSubmit vkQueueSubmit; | 283 | PFN_vkQueueSubmit vkQueueSubmit; |
| 273 | PFN_vkResetFences vkResetFences; | 284 | PFN_vkResetFences vkResetFences; |
| 274 | PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; | 285 | PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; |
| 286 | PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; | ||
| 287 | PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; | ||
| 275 | PFN_vkUnmapMemory vkUnmapMemory; | 288 | PFN_vkUnmapMemory vkUnmapMemory; |
| 276 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; | 289 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; |
| 277 | PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; | 290 | PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; |
| @@ -542,18 +555,14 @@ private: | |||
| 542 | const DeviceDispatch* dld = nullptr; | 555 | const DeviceDispatch* dld = nullptr; |
| 543 | }; | 556 | }; |
| 544 | 557 | ||
| 545 | using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>; | 558 | using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; |
| 546 | using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; | ||
| 547 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; | 559 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; |
| 548 | using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; | 560 | using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; |
| 549 | using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>; | ||
| 550 | using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>; | ||
| 551 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; | 561 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; |
| 552 | using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; | 562 | using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; |
| 553 | using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; | 563 | using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; |
| 554 | using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; | 564 | using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; |
| 555 | using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; | 565 | using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; |
| 556 | using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; | ||
| 557 | using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; | 566 | using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; |
| 558 | 567 | ||
| 559 | using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; | 568 | using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; |
| @@ -564,16 +573,25 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> { | |||
| 564 | using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; | 573 | using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; |
| 565 | 574 | ||
| 566 | public: | 575 | public: |
| 567 | /// Creates a Vulkan instance. Use "operator bool" for error handling. | 576 | /// Creates a Vulkan instance. |
| 577 | /// @throw Exception on initialization error. | ||
| 568 | static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, | 578 | static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, |
| 569 | InstanceDispatch& dispatch) noexcept; | 579 | InstanceDispatch& dispatch); |
| 570 | 580 | ||
| 571 | /// Enumerates physical devices. | 581 | /// Enumerates physical devices. |
| 572 | /// @return Physical devices and an empty handle on failure. | 582 | /// @return Physical devices and an empty handle on failure. |
| 573 | std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices(); | 583 | /// @throw Exception on Vulkan error. |
| 584 | std::vector<VkPhysicalDevice> EnumeratePhysicalDevices() const; | ||
| 585 | |||
| 586 | /// Creates a debug callback messenger. | ||
| 587 | /// @throw Exception on creation failure. | ||
| 588 | DebugUtilsMessenger CreateDebugUtilsMessenger( | ||
| 589 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; | ||
| 574 | 590 | ||
| 575 | /// Tries to create a debug callback messenger. Returns an empty handle on failure. | 591 | /// Returns dispatch table. |
| 576 | DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; | 592 | const InstanceDispatch& Dispatch() const noexcept { |
| 593 | return *dld; | ||
| 594 | } | ||
| 577 | }; | 595 | }; |
| 578 | 596 | ||
| 579 | class Queue { | 597 | class Queue { |
| @@ -605,6 +623,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { | |||
| 605 | public: | 623 | public: |
| 606 | /// Attaches a memory allocation. | 624 | /// Attaches a memory allocation. |
| 607 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | 625 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; |
| 626 | |||
| 627 | /// Set object name. | ||
| 628 | void SetObjectNameEXT(const char* name) const; | ||
| 629 | }; | ||
| 630 | |||
| 631 | class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { | ||
| 632 | using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; | ||
| 633 | |||
| 634 | public: | ||
| 635 | /// Set object name. | ||
| 636 | void SetObjectNameEXT(const char* name) const; | ||
| 608 | }; | 637 | }; |
| 609 | 638 | ||
| 610 | class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { | 639 | class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { |
| @@ -613,12 +642,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { | |||
| 613 | public: | 642 | public: |
| 614 | /// Attaches a memory allocation. | 643 | /// Attaches a memory allocation. |
| 615 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | 644 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; |
| 645 | |||
| 646 | /// Set object name. | ||
| 647 | void SetObjectNameEXT(const char* name) const; | ||
| 648 | }; | ||
| 649 | |||
| 650 | class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { | ||
| 651 | using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; | ||
| 652 | |||
| 653 | public: | ||
| 654 | /// Set object name. | ||
| 655 | void SetObjectNameEXT(const char* name) const; | ||
| 616 | }; | 656 | }; |
| 617 | 657 | ||
| 618 | class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { | 658 | class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { |
| 619 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; | 659 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; |
| 620 | 660 | ||
| 621 | public: | 661 | public: |
| 662 | /// Set object name. | ||
| 663 | void SetObjectNameEXT(const char* name) const; | ||
| 664 | |||
| 622 | u8* Map(VkDeviceSize offset, VkDeviceSize size) const { | 665 | u8* Map(VkDeviceSize offset, VkDeviceSize size) const { |
| 623 | void* data; | 666 | void* data; |
| 624 | Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); | 667 | Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); |
| @@ -634,6 +677,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> { | |||
| 634 | using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; | 677 | using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; |
| 635 | 678 | ||
| 636 | public: | 679 | public: |
| 680 | /// Set object name. | ||
| 681 | void SetObjectNameEXT(const char* name) const; | ||
| 682 | |||
| 637 | VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { | 683 | VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { |
| 638 | return dld->vkWaitForFences(owner, 1, &handle, true, timeout); | 684 | return dld->vkWaitForFences(owner, 1, &handle, true, timeout); |
| 639 | } | 685 | } |
| @@ -647,11 +693,22 @@ public: | |||
| 647 | } | 693 | } |
| 648 | }; | 694 | }; |
| 649 | 695 | ||
| 696 | class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> { | ||
| 697 | using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle; | ||
| 698 | |||
| 699 | public: | ||
| 700 | /// Set object name. | ||
| 701 | void SetObjectNameEXT(const char* name) const; | ||
| 702 | }; | ||
| 703 | |||
| 650 | class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { | 704 | class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { |
| 651 | using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; | 705 | using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; |
| 652 | 706 | ||
| 653 | public: | 707 | public: |
| 654 | DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; | 708 | DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; |
| 709 | |||
| 710 | /// Set object name. | ||
| 711 | void SetObjectNameEXT(const char* name) const; | ||
| 655 | }; | 712 | }; |
| 656 | 713 | ||
| 657 | class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { | 714 | class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { |
| @@ -660,6 +717,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { | |||
| 660 | public: | 717 | public: |
| 661 | CommandBuffers Allocate(std::size_t num_buffers, | 718 | CommandBuffers Allocate(std::size_t num_buffers, |
| 662 | VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; | 719 | VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; |
| 720 | |||
| 721 | /// Set object name. | ||
| 722 | void SetObjectNameEXT(const char* name) const; | ||
| 663 | }; | 723 | }; |
| 664 | 724 | ||
| 665 | class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { | 725 | class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { |
| @@ -673,15 +733,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { | |||
| 673 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; | 733 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; |
| 674 | 734 | ||
| 675 | public: | 735 | public: |
| 736 | /// Set object name. | ||
| 737 | void SetObjectNameEXT(const char* name) const; | ||
| 738 | |||
| 676 | VkResult GetStatus() const noexcept { | 739 | VkResult GetStatus() const noexcept { |
| 677 | return dld->vkGetEventStatus(owner, handle); | 740 | return dld->vkGetEventStatus(owner, handle); |
| 678 | } | 741 | } |
| 679 | }; | 742 | }; |
| 680 | 743 | ||
| 744 | class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> { | ||
| 745 | using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle; | ||
| 746 | |||
| 747 | public: | ||
| 748 | /// Set object name. | ||
| 749 | void SetObjectNameEXT(const char* name) const; | ||
| 750 | }; | ||
| 751 | |||
| 681 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { | 752 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { |
| 682 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; | 753 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; |
| 683 | 754 | ||
| 684 | public: | 755 | public: |
| 756 | /// Set object name. | ||
| 757 | void SetObjectNameEXT(const char* name) const; | ||
| 758 | |||
| 685 | [[nodiscard]] u64 GetCounter() const { | 759 | [[nodiscard]] u64 GetCounter() const { |
| 686 | u64 value; | 760 | u64 value; |
| 687 | Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); | 761 | Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); |
| @@ -722,7 +796,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { | |||
| 722 | public: | 796 | public: |
| 723 | static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, | 797 | static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, |
| 724 | Span<const char*> enabled_extensions, const void* next, | 798 | Span<const char*> enabled_extensions, const void* next, |
| 725 | DeviceDispatch& dispatch) noexcept; | 799 | DeviceDispatch& dispatch); |
| 726 | 800 | ||
| 727 | Queue GetQueue(u32 family_index) const noexcept; | 801 | Queue GetQueue(u32 family_index) const noexcept; |
| 728 | 802 | ||
| @@ -932,6 +1006,12 @@ public: | |||
| 932 | regions.data(), filter); | 1006 | regions.data(), filter); |
| 933 | } | 1007 | } |
| 934 | 1008 | ||
| 1009 | void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, | ||
| 1010 | VkImageLayout dst_layout, Span<VkImageResolve> regions) { | ||
| 1011 | dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), | ||
| 1012 | regions.data()); | ||
| 1013 | } | ||
| 1014 | |||
| 935 | void Dispatch(u32 x, u32 y, u32 z) const noexcept { | 1015 | void Dispatch(u32 x, u32 y, u32 z) const noexcept { |
| 936 | dld->vkCmdDispatch(handle, x, y, z); | 1016 | dld->vkCmdDispatch(handle, x, y, z); |
| 937 | } | 1017 | } |
| @@ -946,6 +1026,23 @@ public: | |||
| 946 | image_barriers.size(), image_barriers.data()); | 1026 | image_barriers.size(), image_barriers.data()); |
| 947 | } | 1027 | } |
| 948 | 1028 | ||
| 1029 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1030 | VkDependencyFlags dependency_flags = 0) const noexcept { | ||
| 1031 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1035 | VkDependencyFlags dependency_flags, | ||
| 1036 | const VkBufferMemoryBarrier& buffer_barrier) const noexcept { | ||
| 1037 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1041 | VkDependencyFlags dependency_flags, | ||
| 1042 | const VkImageMemoryBarrier& image_barrier) const noexcept { | ||
| 1043 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); | ||
| 1044 | } | ||
| 1045 | |||
| 949 | void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, | 1046 | void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, |
| 950 | Span<VkBufferImageCopy> regions) const noexcept { | 1047 | Span<VkBufferImageCopy> regions) const noexcept { |
| 951 | dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), | 1048 | dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), |
| @@ -979,6 +1076,13 @@ public: | |||
| 979 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); | 1076 | dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); |
| 980 | } | 1077 | } |
| 981 | 1078 | ||
| 1079 | template <typename T> | ||
| 1080 | void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, | ||
| 1081 | const T& data) const noexcept { | ||
| 1082 | static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable"); | ||
| 1083 | dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data); | ||
| 1084 | } | ||
| 1085 | |||
| 982 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { | 1086 | void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { |
| 983 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); | 1087 | dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); |
| 984 | } | 1088 | } |
| @@ -1088,6 +1192,20 @@ public: | |||
| 1088 | counter_buffers, counter_buffer_offsets); | 1192 | counter_buffers, counter_buffer_offsets); |
| 1089 | } | 1193 | } |
| 1090 | 1194 | ||
| 1195 | void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { | ||
| 1196 | const VkDebugUtilsLabelEXT label_info{ | ||
| 1197 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, | ||
| 1198 | .pNext = nullptr, | ||
| 1199 | .pLabelName = label, | ||
| 1200 | .color{color[0], color[1], color[2], color[3]}, | ||
| 1201 | }; | ||
| 1202 | dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | void EndDebugUtilsLabelEXT() const noexcept { | ||
| 1206 | dld->vkCmdEndDebugUtilsLabelEXT(handle); | ||
| 1207 | } | ||
| 1208 | |||
| 1091 | private: | 1209 | private: |
| 1092 | VkCommandBuffer handle; | 1210 | VkCommandBuffer handle; |
| 1093 | const DeviceDispatch* dld; | 1211 | const DeviceDispatch* dld; |
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp index a15e8ca2a..c680fd2c2 100644 --- a/src/yuzu/applets/controller.cpp +++ b/src/yuzu/applets/controller.cpp | |||
| @@ -535,7 +535,7 @@ void QtControllerSelectorDialog::UpdateControllerState(std::size_t player_index) | |||
| 535 | // This emulates a delay between disconnecting and reconnecting controllers as some games | 535 | // This emulates a delay between disconnecting and reconnecting controllers as some games |
| 536 | // do not respond to a change in controller type if it was instantaneous. | 536 | // do not respond to a change in controller type if it was instantaneous. |
| 537 | using namespace std::chrono_literals; | 537 | using namespace std::chrono_literals; |
| 538 | std::this_thread::sleep_for(20ms); | 538 | std::this_thread::sleep_for(60ms); |
| 539 | 539 | ||
| 540 | UpdateController(controller_type, player_index, player_connected); | 540 | UpdateController(controller_type, player_index, player_connected); |
| 541 | } | 541 | } |
diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp index 53a993cf6..8ee03ddb3 100644 --- a/src/yuzu/applets/error.cpp +++ b/src/yuzu/applets/error.cpp | |||
| @@ -19,7 +19,7 @@ QtErrorDisplay::~QtErrorDisplay() = default; | |||
| 19 | void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { | 19 | void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { |
| 20 | callback = std::move(finished); | 20 | callback = std::move(finished); |
| 21 | emit MainWindowDisplayError( | 21 | emit MainWindowDisplayError( |
| 22 | tr("An error has occured.\nPlease try again or contact the developer of the " | 22 | tr("An error has occurred.\nPlease try again or contact the developer of the " |
| 23 | "software.\n\nError Code: %1-%2 (0x%3)") | 23 | "software.\n\nError Code: %1-%2 (0x%3)") |
| 24 | .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) | 24 | .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) |
| 25 | .arg(error.description, 4, 10, QChar::fromLatin1('0')) | 25 | .arg(error.description, 4, 10, QChar::fromLatin1('0')) |
| @@ -32,7 +32,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon | |||
| 32 | 32 | ||
| 33 | const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); | 33 | const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); |
| 34 | emit MainWindowDisplayError( | 34 | emit MainWindowDisplayError( |
| 35 | tr("An error occured on %1 at %2.\nPlease try again or contact the " | 35 | tr("An error occurred on %1 at %2.\nPlease try again or contact the " |
| 36 | "developer of the software.\n\nError Code: %3-%4 (0x%5)") | 36 | "developer of the software.\n\nError Code: %3-%4 (0x%5)") |
| 37 | .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy"))) | 37 | .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy"))) |
| 38 | .arg(date_time.toString(QStringLiteral("h:mm:ss A"))) | 38 | .arg(date_time.toString(QStringLiteral("h:mm:ss A"))) |
| @@ -46,7 +46,7 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te | |||
| 46 | std::function<void()> finished) const { | 46 | std::function<void()> finished) const { |
| 47 | callback = std::move(finished); | 47 | callback = std::move(finished); |
| 48 | emit MainWindowDisplayError( | 48 | emit MainWindowDisplayError( |
| 49 | tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") | 49 | tr("An error has occurred.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") |
| 50 | .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) | 50 | .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) |
| 51 | .arg(error.description, 4, 10, QChar::fromLatin1('0')) | 51 | .arg(error.description, 4, 10, QChar::fromLatin1('0')) |
| 52 | .arg(error.raw, 8, 16, QChar::fromLatin1('0')) | 52 | .arg(error.raw, 8, 16, QChar::fromLatin1('0')) |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index e124836b5..85ee2577d 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -397,7 +397,7 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) { | |||
| 397 | this->TouchPressed(x, y); | 397 | this->TouchPressed(x, y); |
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | QWidget::mousePressEvent(event); | 400 | emit MouseActivity(); |
| 401 | } | 401 | } |
| 402 | 402 | ||
| 403 | void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { | 403 | void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { |
| @@ -411,7 +411,7 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { | |||
| 411 | input_subsystem->GetMouse()->MouseMove(x, y); | 411 | input_subsystem->GetMouse()->MouseMove(x, y); |
| 412 | this->TouchMoved(x, y); | 412 | this->TouchMoved(x, y); |
| 413 | 413 | ||
| 414 | QWidget::mouseMoveEvent(event); | 414 | emit MouseActivity(); |
| 415 | } | 415 | } |
| 416 | 416 | ||
| 417 | void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { | 417 | void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { |
| @@ -688,3 +688,10 @@ void GRenderWindow::showEvent(QShowEvent* event) { | |||
| 688 | connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged, | 688 | connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged, |
| 689 | Qt::UniqueConnection); | 689 | Qt::UniqueConnection); |
| 690 | } | 690 | } |
| 691 | |||
| 692 | bool GRenderWindow::eventFilter(QObject* object, QEvent* event) { | ||
| 693 | if (event->type() == QEvent::HoverMove) { | ||
| 694 | emit MouseActivity(); | ||
| 695 | } | ||
| 696 | return false; | ||
| 697 | } | ||
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index ebe5cb965..339095509 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -184,6 +184,7 @@ signals: | |||
| 184 | void Closed(); | 184 | void Closed(); |
| 185 | void FirstFrameDisplayed(); | 185 | void FirstFrameDisplayed(); |
| 186 | void ExecuteProgramSignal(std::size_t program_index); | 186 | void ExecuteProgramSignal(std::size_t program_index); |
| 187 | void MouseActivity(); | ||
| 187 | 188 | ||
| 188 | private: | 189 | private: |
| 189 | void TouchBeginEvent(const QTouchEvent* event); | 190 | void TouchBeginEvent(const QTouchEvent* event); |
| @@ -216,4 +217,5 @@ private: | |||
| 216 | 217 | ||
| 217 | protected: | 218 | protected: |
| 218 | void showEvent(QShowEvent* event) override; | 219 | void showEvent(QShowEvent* event) override; |
| 220 | bool eventFilter(QObject* object, QEvent* event) override; | ||
| 219 | }; | 221 | }; |
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index 649912557..a470056ef 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp | |||
| @@ -72,7 +72,7 @@ void CompatDB::Submit() { | |||
| 72 | void CompatDB::OnTestcaseSubmitted() { | 72 | void CompatDB::OnTestcaseSubmitted() { |
| 73 | if (!testcase_watcher.result()) { | 73 | if (!testcase_watcher.result()) { |
| 74 | QMessageBox::critical(this, tr("Communication error"), | 74 | QMessageBox::critical(this, tr("Communication error"), |
| 75 | tr("An error occured while sending the Testcase")); | 75 | tr("An error occurred while sending the Testcase")); |
| 76 | button(NextButton)->setEnabled(true); | 76 | button(NextButton)->setEnabled(true); |
| 77 | button(NextButton)->setText(tr("Next")); | 77 | button(NextButton)->setText(tr("Next")); |
| 78 | button(CancelButton)->setVisible(true); | 78 | button(CancelButton)->setVisible(true); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 9fb254986..cda448718 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -514,7 +514,7 @@ void Config::ReadControlValues() { | |||
| 514 | Settings::values.emulate_analog_keyboard = | 514 | Settings::values.emulate_analog_keyboard = |
| 515 | ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); | 515 | ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); |
| 516 | 516 | ||
| 517 | ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false); | 517 | ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true); |
| 518 | ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), | 518 | ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), |
| 519 | true); | 519 | true); |
| 520 | ReadSettingGlobal(Settings::values.enable_accurate_vibrations, | 520 | ReadSettingGlobal(Settings::values.enable_accurate_vibrations, |
| @@ -764,6 +764,8 @@ void Config::ReadCpuValues() { | |||
| 764 | ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); | 764 | ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); |
| 765 | Settings::values.cpuopt_unsafe_reduce_fp_error = | 765 | Settings::values.cpuopt_unsafe_reduce_fp_error = |
| 766 | ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); | 766 | ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); |
| 767 | Settings::values.cpuopt_unsafe_inaccurate_nan = | ||
| 768 | ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool(); | ||
| 767 | } | 769 | } |
| 768 | 770 | ||
| 769 | qt_config->endGroup(); | 771 | qt_config->endGroup(); |
| @@ -1174,7 +1176,7 @@ void Config::SaveControlValues() { | |||
| 1174 | SaveTouchscreenValues(); | 1176 | SaveTouchscreenValues(); |
| 1175 | SaveMotionTouchValues(); | 1177 | SaveMotionTouchValues(); |
| 1176 | 1178 | ||
| 1177 | WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); | 1179 | WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, true); |
| 1178 | WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled, | 1180 | WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled, |
| 1179 | true); | 1181 | true); |
| 1180 | WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"), | 1182 | WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"), |
| @@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() { | |||
| 1327 | Settings::values.cpuopt_unsafe_unfuse_fma, true); | 1329 | Settings::values.cpuopt_unsafe_unfuse_fma, true); |
| 1328 | WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), | 1330 | WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), |
| 1329 | Settings::values.cpuopt_unsafe_reduce_fp_error, true); | 1331 | Settings::values.cpuopt_unsafe_reduce_fp_error, true); |
| 1332 | WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), | ||
| 1333 | Settings::values.cpuopt_unsafe_inaccurate_nan, true); | ||
| 1330 | } | 1334 | } |
| 1331 | 1335 | ||
| 1332 | qt_config->endGroup(); | 1336 | qt_config->endGroup(); |
| @@ -1589,14 +1593,12 @@ void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool | |||
| 1589 | 1593 | ||
| 1590 | void Config::Reload() { | 1594 | void Config::Reload() { |
| 1591 | ReadValues(); | 1595 | ReadValues(); |
| 1592 | Settings::Sanitize(); | ||
| 1593 | // To apply default value changes | 1596 | // To apply default value changes |
| 1594 | SaveValues(); | 1597 | SaveValues(); |
| 1595 | Settings::Apply(Core::System::GetInstance()); | 1598 | Settings::Apply(Core::System::GetInstance()); |
| 1596 | } | 1599 | } |
| 1597 | 1600 | ||
| 1598 | void Config::Save() { | 1601 | void Config::Save() { |
| 1599 | Settings::Sanitize(); | ||
| 1600 | SaveValues(); | 1602 | SaveValues(); |
| 1601 | } | 1603 | } |
| 1602 | 1604 | ||
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 37fcd6adc..d055cbd60 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp | |||
| @@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() { | |||
| 36 | ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); | 36 | ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); |
| 37 | ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); | 37 | ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); |
| 38 | ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); | 38 | ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); |
| 39 | ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); | ||
| 40 | ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan); | ||
| 39 | } | 41 | } |
| 40 | 42 | ||
| 41 | void ConfigureCpu::AccuracyUpdated(int index) { | 43 | void ConfigureCpu::AccuracyUpdated(int index) { |
| @@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() { | |||
| 61 | static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex()); | 63 | static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex()); |
| 62 | Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); | 64 | Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); |
| 63 | Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); | 65 | Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); |
| 66 | Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked(); | ||
| 64 | } | 67 | } |
| 65 | 68 | ||
| 66 | void ConfigureCpu::changeEvent(QEvent* event) { | 69 | void ConfigureCpu::changeEvent(QEvent* event) { |
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index ebdd2e6e9..bcd0962e9 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui | |||
| @@ -109,6 +109,18 @@ | |||
| 109 | </property> | 109 | </property> |
| 110 | </widget> | 110 | </widget> |
| 111 | </item> | 111 | </item> |
| 112 | <item> | ||
| 113 | <widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan"> | ||
| 114 | <property name="text"> | ||
| 115 | <string>Inaccurate NaN handling</string> | ||
| 116 | </property> | ||
| 117 | <property name="toolTip"> | ||
| 118 | <string> | ||
| 119 | <div>This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.</div> | ||
| 120 | </string> | ||
| 121 | </property> | ||
| 122 | </widget> | ||
| 123 | </item> | ||
| 112 | </layout> | 124 | </layout> |
| 113 | </widget> | 125 | </widget> |
| 114 | </item> | 126 | </item> |
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index d9009091b..567a36d9b 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <thread> | ||
| 7 | 8 | ||
| 8 | #include <QSignalBlocker> | 9 | #include <QSignalBlocker> |
| 9 | #include <QTimer> | 10 | #include <QTimer> |
| @@ -181,8 +182,18 @@ QList<QWidget*> ConfigureInput::GetSubTabs() const { | |||
| 181 | } | 182 | } |
| 182 | 183 | ||
| 183 | void ConfigureInput::ApplyConfiguration() { | 184 | void ConfigureInput::ApplyConfiguration() { |
| 184 | for (auto controller : player_controllers) { | 185 | for (auto* controller : player_controllers) { |
| 185 | controller->ApplyConfiguration(); | 186 | controller->ApplyConfiguration(); |
| 187 | controller->TryDisconnectSelectedController(); | ||
| 188 | } | ||
| 189 | |||
| 190 | // This emulates a delay between disconnecting and reconnecting controllers as some games | ||
| 191 | // do not respond to a change in controller type if it was instantaneous. | ||
| 192 | using namespace std::chrono_literals; | ||
| 193 | std::this_thread::sleep_for(60ms); | ||
| 194 | |||
| 195 | for (auto* controller : player_controllers) { | ||
| 196 | controller->TryConnectSelectedController(); | ||
| 186 | } | 197 | } |
| 187 | 198 | ||
| 188 | advanced->ApplyConfiguration(); | 199 | advanced->ApplyConfiguration(); |
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index f9915fb7a..46ea026e4 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <thread> | ||
| 8 | #include <utility> | 7 | #include <utility> |
| 9 | #include <QGridLayout> | 8 | #include <QGridLayout> |
| 10 | #include <QInputDialog> | 9 | #include <QInputDialog> |
| @@ -173,61 +172,31 @@ QString AnalogToText(const Common::ParamPackage& param, const std::string& dir) | |||
| 173 | return ButtonToText(Common::ParamPackage{param.Get(dir, "")}); | 172 | return ButtonToText(Common::ParamPackage{param.Get(dir, "")}); |
| 174 | } | 173 | } |
| 175 | 174 | ||
| 176 | if (param.Get("engine", "") == "sdl") { | 175 | const auto engine_str = param.Get("engine", ""); |
| 177 | if (dir == "modifier") { | 176 | const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); |
| 178 | return QObject::tr("[unused]"); | 177 | const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); |
| 179 | } | 178 | const bool invert_x = param.Get("invert_x", "+") == "-"; |
| 180 | 179 | const bool invert_y = param.Get("invert_y", "+") == "-"; | |
| 181 | if (dir == "left" || dir == "right") { | 180 | if (engine_str == "sdl" || engine_str == "gcpad" || engine_str == "mouse") { |
| 182 | const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); | ||
| 183 | |||
| 184 | return QObject::tr("Axis %1").arg(axis_x_str); | ||
| 185 | } | ||
| 186 | |||
| 187 | if (dir == "up" || dir == "down") { | ||
| 188 | const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); | ||
| 189 | |||
| 190 | return QObject::tr("Axis %1").arg(axis_y_str); | ||
| 191 | } | ||
| 192 | |||
| 193 | return {}; | ||
| 194 | } | ||
| 195 | |||
| 196 | if (param.Get("engine", "") == "gcpad") { | ||
| 197 | if (dir == "modifier") { | 181 | if (dir == "modifier") { |
| 198 | return QObject::tr("[unused]"); | 182 | return QObject::tr("[unused]"); |
| 199 | } | 183 | } |
| 200 | 184 | ||
| 201 | if (dir == "left" || dir == "right") { | 185 | if (dir == "left") { |
| 202 | const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); | 186 | const QString invert_x_str = QString::fromStdString(invert_x ? "+" : "-"); |
| 203 | 187 | return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str); | |
| 204 | return QObject::tr("GC Axis %1").arg(axis_x_str); | ||
| 205 | } | 188 | } |
| 206 | 189 | if (dir == "right") { | |
| 207 | if (dir == "up" || dir == "down") { | 190 | const QString invert_x_str = QString::fromStdString(invert_x ? "-" : "+"); |
| 208 | const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); | 191 | return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str); |
| 209 | |||
| 210 | return QObject::tr("GC Axis %1").arg(axis_y_str); | ||
| 211 | } | ||
| 212 | |||
| 213 | return {}; | ||
| 214 | } | ||
| 215 | |||
| 216 | if (param.Get("engine", "") == "mouse") { | ||
| 217 | if (dir == "modifier") { | ||
| 218 | return QObject::tr("[unused]"); | ||
| 219 | } | 192 | } |
| 220 | 193 | if (dir == "up") { | |
| 221 | if (dir == "left" || dir == "right") { | 194 | const QString invert_y_str = QString::fromStdString(invert_y ? "-" : "+"); |
| 222 | const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); | 195 | return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str); |
| 223 | |||
| 224 | return QObject::tr("Mouse %1").arg(axis_x_str); | ||
| 225 | } | 196 | } |
| 226 | 197 | if (dir == "down") { | |
| 227 | if (dir == "up" || dir == "down") { | 198 | const QString invert_y_str = QString::fromStdString(invert_y ? "+" : "-"); |
| 228 | const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); | 199 | return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str); |
| 229 | |||
| 230 | return QObject::tr("Mouse %1").arg(axis_y_str); | ||
| 231 | } | 200 | } |
| 232 | 201 | ||
| 233 | return {}; | 202 | return {}; |
| @@ -396,6 +365,25 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 396 | analogs_param[analog_id].Clear(); | 365 | analogs_param[analog_id].Clear(); |
| 397 | analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]")); | 366 | analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]")); |
| 398 | }); | 367 | }); |
| 368 | context_menu.addAction(tr("Invert axis"), [&] { | ||
| 369 | if (sub_button_id == 2 || sub_button_id == 3) { | ||
| 370 | const bool invert_value = | ||
| 371 | analogs_param[analog_id].Get("invert_x", "+") == "-"; | ||
| 372 | const std::string invert_str = invert_value ? "+" : "-"; | ||
| 373 | analogs_param[analog_id].Set("invert_x", invert_str); | ||
| 374 | } | ||
| 375 | if (sub_button_id == 0 || sub_button_id == 1) { | ||
| 376 | const bool invert_value = | ||
| 377 | analogs_param[analog_id].Get("invert_y", "+") == "-"; | ||
| 378 | const std::string invert_str = invert_value ? "+" : "-"; | ||
| 379 | analogs_param[analog_id].Set("invert_y", invert_str); | ||
| 380 | } | ||
| 381 | for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM; | ||
| 382 | ++sub_button_id) { | ||
| 383 | analog_map_buttons[analog_id][sub_button_id]->setText(AnalogToText( | ||
| 384 | analogs_param[analog_id], analog_sub_buttons[sub_button_id])); | ||
| 385 | } | ||
| 386 | }); | ||
| 399 | context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( | 387 | context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( |
| 400 | menu_location)); | 388 | menu_location)); |
| 401 | }); | 389 | }); |
| @@ -587,6 +575,10 @@ void ConfigureInputPlayer::ApplyConfiguration() { | |||
| 587 | 575 | ||
| 588 | std::transform(motions_param.begin(), motions_param.end(), motions.begin(), | 576 | std::transform(motions_param.begin(), motions_param.end(), motions.begin(), |
| 589 | [](const Common::ParamPackage& param) { return param.Serialize(); }); | 577 | [](const Common::ParamPackage& param) { return param.Serialize(); }); |
| 578 | } | ||
| 579 | |||
| 580 | void ConfigureInputPlayer::TryConnectSelectedController() { | ||
| 581 | auto& player = Settings::values.players.GetValue()[player_index]; | ||
| 590 | 582 | ||
| 591 | const auto controller_type = | 583 | const auto controller_type = |
| 592 | GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); | 584 | GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); |
| @@ -599,15 +591,12 @@ void ConfigureInputPlayer::ApplyConfiguration() { | |||
| 599 | return; | 591 | return; |
| 600 | } | 592 | } |
| 601 | 593 | ||
| 602 | // Disconnect the controller first. | ||
| 603 | UpdateController(controller_type, player_index, false); | ||
| 604 | |||
| 605 | player.controller_type = controller_type; | 594 | player.controller_type = controller_type; |
| 606 | player.connected = player_connected; | 595 | player.connected = player_connected; |
| 607 | 596 | ||
| 608 | ConfigureVibration::SetVibrationDevices(player_index); | 597 | ConfigureVibration::SetVibrationDevices(player_index); |
| 609 | 598 | ||
| 610 | // Handheld | 599 | // Connect/Disconnect Handheld depending on Player 1's controller configuration. |
| 611 | if (player_index == 0) { | 600 | if (player_index == 0) { |
| 612 | auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX]; | 601 | auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX]; |
| 613 | if (controller_type == Settings::ControllerType::Handheld) { | 602 | if (controller_type == Settings::ControllerType::Handheld) { |
| @@ -622,14 +611,26 @@ void ConfigureInputPlayer::ApplyConfiguration() { | |||
| 622 | return; | 611 | return; |
| 623 | } | 612 | } |
| 624 | 613 | ||
| 625 | // This emulates a delay between disconnecting and reconnecting controllers as some games | ||
| 626 | // do not respond to a change in controller type if it was instantaneous. | ||
| 627 | using namespace std::chrono_literals; | ||
| 628 | std::this_thread::sleep_for(20ms); | ||
| 629 | |||
| 630 | UpdateController(controller_type, player_index, player_connected); | 614 | UpdateController(controller_type, player_index, player_connected); |
| 631 | } | 615 | } |
| 632 | 616 | ||
| 617 | void ConfigureInputPlayer::TryDisconnectSelectedController() { | ||
| 618 | const auto& player = Settings::values.players.GetValue()[player_index]; | ||
| 619 | |||
| 620 | const auto controller_type = | ||
| 621 | GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); | ||
| 622 | const auto player_connected = ui->groupConnectedController->isChecked() && | ||
| 623 | controller_type != Settings::ControllerType::Handheld; | ||
| 624 | |||
| 625 | // Do not do anything if the controller configuration has not changed. | ||
| 626 | if (player.controller_type == controller_type && player.connected == player_connected) { | ||
| 627 | return; | ||
| 628 | } | ||
| 629 | |||
| 630 | // Disconnect the controller first. | ||
| 631 | UpdateController(controller_type, player_index, false); | ||
| 632 | } | ||
| 633 | |||
| 633 | void ConfigureInputPlayer::showEvent(QShowEvent* event) { | 634 | void ConfigureInputPlayer::showEvent(QShowEvent* event) { |
| 634 | if (bottom_row == nullptr) { | 635 | if (bottom_row == nullptr) { |
| 635 | return; | 636 | return; |
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index 9c30879a2..c4ae50de7 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h | |||
| @@ -54,6 +54,18 @@ public: | |||
| 54 | /// Save all button configurations to settings file. | 54 | /// Save all button configurations to settings file. |
| 55 | void ApplyConfiguration(); | 55 | void ApplyConfiguration(); |
| 56 | 56 | ||
| 57 | /** | ||
| 58 | * Attempts to connect the currently selected controller in the HID backend. | ||
| 59 | * This function will not do anything if it is not connected in the frontend. | ||
| 60 | */ | ||
| 61 | void TryConnectSelectedController(); | ||
| 62 | |||
| 63 | /** | ||
| 64 | * Attempts to disconnect the currently selected controller in the HID backend. | ||
| 65 | * This function will not do anything if the configuration has not changed. | ||
| 66 | */ | ||
| 67 | void TryDisconnectSelectedController(); | ||
| 68 | |||
| 57 | /// Set the connection state checkbox (used to sync state). | 69 | /// Set the connection state checkbox (used to sync state). |
| 58 | void ConnectPlayer(bool connected); | 70 | void ConnectPlayer(bool connected); |
| 59 | 71 | ||
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp index c2a7113da..eb8eacbf9 100644 --- a/src/yuzu/configuration/configure_motion_touch.cpp +++ b/src/yuzu/configuration/configure_motion_touch.cpp | |||
| @@ -51,6 +51,8 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent, | |||
| 51 | case CalibrationConfigurationJob::Status::Completed: | 51 | case CalibrationConfigurationJob::Status::Completed: |
| 52 | text = tr("Configuration completed!"); | 52 | text = tr("Configuration completed!"); |
| 53 | break; | 53 | break; |
| 54 | default: | ||
| 55 | break; | ||
| 54 | } | 56 | } |
| 55 | QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text)); | 57 | QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text)); |
| 56 | if (status == CalibrationConfigurationJob::Status::Completed) { | 58 | if (status == CalibrationConfigurationJob::Status::Completed) { |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ebaccd2ef..2e74037d1 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -142,7 +142,7 @@ constexpr int default_mouse_timeout = 2500; | |||
| 142 | /** | 142 | /** |
| 143 | * "Callouts" are one-time instructional messages shown to the user. In the config settings, there | 143 | * "Callouts" are one-time instructional messages shown to the user. In the config settings, there |
| 144 | * is a bitfield "callout_flags" options, used to track if a message has already been shown to the | 144 | * is a bitfield "callout_flags" options, used to track if a message has already been shown to the |
| 145 | * user. This is 32-bits - if we have more than 32 callouts, we should retire and recyle old ones. | 145 | * user. This is 32-bits - if we have more than 32 callouts, we should retire and recycle old ones. |
| 146 | */ | 146 | */ |
| 147 | enum class CalloutFlag : uint32_t { | 147 | enum class CalloutFlag : uint32_t { |
| 148 | Telemetry = 0x1, | 148 | Telemetry = 0x1, |
| @@ -616,9 +616,8 @@ void GMainWindow::InitializeWidgets() { | |||
| 616 | if (emulation_running) { | 616 | if (emulation_running) { |
| 617 | return; | 617 | return; |
| 618 | } | 618 | } |
| 619 | const bool is_async = !Settings::values.use_asynchronous_gpu_emulation.GetValue() || | 619 | Settings::values.use_asynchronous_gpu_emulation.SetValue( |
| 620 | Settings::values.use_multi_core.GetValue(); | 620 | !Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| 621 | Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); | ||
| 622 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | 621 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| 623 | Settings::Apply(Core::System::GetInstance()); | 622 | Settings::Apply(Core::System::GetInstance()); |
| 624 | }); | 623 | }); |
| @@ -635,16 +634,13 @@ void GMainWindow::InitializeWidgets() { | |||
| 635 | return; | 634 | return; |
| 636 | } | 635 | } |
| 637 | Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); | 636 | Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); |
| 638 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue() || | ||
| 639 | Settings::values.use_multi_core.GetValue(); | ||
| 640 | Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); | ||
| 641 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | ||
| 642 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | 637 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); |
| 643 | Settings::Apply(Core::System::GetInstance()); | 638 | Settings::Apply(Core::System::GetInstance()); |
| 644 | }); | 639 | }); |
| 645 | multicore_status_button->setText(tr("MULTICORE")); | 640 | multicore_status_button->setText(tr("MULTICORE")); |
| 646 | multicore_status_button->setCheckable(true); | 641 | multicore_status_button->setCheckable(true); |
| 647 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | 642 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); |
| 643 | |||
| 648 | statusBar()->insertPermanentWidget(0, multicore_status_button); | 644 | statusBar()->insertPermanentWidget(0, multicore_status_button); |
| 649 | statusBar()->insertPermanentWidget(0, async_status_button); | 645 | statusBar()->insertPermanentWidget(0, async_status_button); |
| 650 | 646 | ||
| @@ -1085,20 +1081,24 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) { | |||
| 1085 | break; | 1081 | break; |
| 1086 | 1082 | ||
| 1087 | default: | 1083 | default: |
| 1088 | if (static_cast<u32>(result) > | 1084 | if (result > Core::System::ResultStatus::ErrorLoader) { |
| 1089 | static_cast<u32>(Core::System::ResultStatus::ErrorLoader)) { | ||
| 1090 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); | 1085 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); |
| 1091 | const u16 error_id = static_cast<u16>(result) - loader_id; | 1086 | const u16 error_id = static_cast<u16>(result) - loader_id; |
| 1092 | const std::string error_code = fmt::format("({:04X}-{:04X})", loader_id, error_id); | 1087 | const std::string error_code = fmt::format("({:04X}-{:04X})", loader_id, error_id); |
| 1093 | LOG_CRITICAL(Frontend, "Failed to load ROM! {}", error_code); | 1088 | LOG_CRITICAL(Frontend, "Failed to load ROM! {}", error_code); |
| 1094 | QMessageBox::critical( | 1089 | |
| 1095 | this, | 1090 | const auto title = |
| 1096 | tr("Error while loading ROM! ").append(QString::fromStdString(error_code)), | 1091 | tr("Error while loading ROM! %1", "%1 signifies a numeric error code.") |
| 1097 | QString::fromStdString(fmt::format( | 1092 | .arg(QString::fromStdString(error_code)); |
| 1098 | "{}<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the " | 1093 | const auto description = |
| 1099 | "yuzu quickstart guide</a> to redump your files.<br>You can refer " | 1094 | tr("%1<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the " |
| 1100 | "to the yuzu wiki</a> or the yuzu Discord</a> for help.", | 1095 | "yuzu quickstart guide</a> to redump your files.<br>You can refer " |
| 1101 | static_cast<Loader::ResultStatus>(error_id)))); | 1096 | "to the yuzu wiki</a> or the yuzu Discord</a> for help.", |
| 1097 | "%1 signifies an error string.") | ||
| 1098 | .arg(QString::fromStdString( | ||
| 1099 | GetResultStatusString(static_cast<Loader::ResultStatus>(error_id)))); | ||
| 1100 | |||
| 1101 | QMessageBox::critical(this, title, description); | ||
| 1102 | } else { | 1102 | } else { |
| 1103 | QMessageBox::critical( | 1103 | QMessageBox::critical( |
| 1104 | this, tr("Error while loading ROM!"), | 1104 | this, tr("Error while loading ROM!"), |
| @@ -1170,6 +1170,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { | |||
| 1170 | [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); }); | 1170 | [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); }); |
| 1171 | 1171 | ||
| 1172 | connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); | 1172 | connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); |
| 1173 | connect(render_window, &GRenderWindow::MouseActivity, this, &GMainWindow::OnMouseActivity); | ||
| 1173 | // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views | 1174 | // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views |
| 1174 | // before the CPU continues | 1175 | // before the CPU continues |
| 1175 | connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, | 1176 | connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, |
| @@ -1193,8 +1194,8 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { | |||
| 1193 | 1194 | ||
| 1194 | if (UISettings::values.hide_mouse) { | 1195 | if (UISettings::values.hide_mouse) { |
| 1195 | mouse_hide_timer.start(); | 1196 | mouse_hide_timer.start(); |
| 1196 | setMouseTracking(true); | 1197 | render_window->installEventFilter(render_window); |
| 1197 | ui.centralwidget->setMouseTracking(true); | 1198 | render_window->setAttribute(Qt::WA_Hover, true); |
| 1198 | } | 1199 | } |
| 1199 | 1200 | ||
| 1200 | std::string title_name; | 1201 | std::string title_name; |
| @@ -1271,8 +1272,8 @@ void GMainWindow::ShutdownGame() { | |||
| 1271 | } | 1272 | } |
| 1272 | game_list->SetFilterFocus(); | 1273 | game_list->SetFilterFocus(); |
| 1273 | 1274 | ||
| 1274 | setMouseTracking(false); | 1275 | render_window->removeEventFilter(render_window); |
| 1275 | ui.centralwidget->setMouseTracking(false); | 1276 | render_window->setAttribute(Qt::WA_Hover, false); |
| 1276 | 1277 | ||
| 1277 | UpdateWindowTitle(); | 1278 | UpdateWindowTitle(); |
| 1278 | 1279 | ||
| @@ -2353,12 +2354,12 @@ void GMainWindow::OnConfigure() { | |||
| 2353 | config->Save(); | 2354 | config->Save(); |
| 2354 | 2355 | ||
| 2355 | if (UISettings::values.hide_mouse && emulation_running) { | 2356 | if (UISettings::values.hide_mouse && emulation_running) { |
| 2356 | setMouseTracking(true); | 2357 | render_window->installEventFilter(render_window); |
| 2357 | ui.centralwidget->setMouseTracking(true); | 2358 | render_window->setAttribute(Qt::WA_Hover, true); |
| 2358 | mouse_hide_timer.start(); | 2359 | mouse_hide_timer.start(); |
| 2359 | } else { | 2360 | } else { |
| 2360 | setMouseTracking(false); | 2361 | render_window->removeEventFilter(render_window); |
| 2361 | ui.centralwidget->setMouseTracking(false); | 2362 | render_window->setAttribute(Qt::WA_Hover, false); |
| 2362 | } | 2363 | } |
| 2363 | 2364 | ||
| 2364 | UpdateStatusButtons(); | 2365 | UpdateStatusButtons(); |
| @@ -2569,9 +2570,6 @@ void GMainWindow::UpdateStatusBar() { | |||
| 2569 | void GMainWindow::UpdateStatusButtons() { | 2570 | void GMainWindow::UpdateStatusButtons() { |
| 2570 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); | 2571 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); |
| 2571 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | 2572 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); |
| 2572 | Settings::values.use_asynchronous_gpu_emulation.SetValue( | ||
| 2573 | Settings::values.use_asynchronous_gpu_emulation.GetValue() || | ||
| 2574 | Settings::values.use_multi_core.GetValue()); | ||
| 2575 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | 2573 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| 2576 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == | 2574 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == |
| 2577 | Settings::RendererBackend::Vulkan); | 2575 | Settings::RendererBackend::Vulkan); |
| @@ -2601,21 +2599,17 @@ void GMainWindow::HideMouseCursor() { | |||
| 2601 | ShowMouseCursor(); | 2599 | ShowMouseCursor(); |
| 2602 | return; | 2600 | return; |
| 2603 | } | 2601 | } |
| 2604 | setCursor(QCursor(Qt::BlankCursor)); | 2602 | render_window->setCursor(QCursor(Qt::BlankCursor)); |
| 2605 | } | 2603 | } |
| 2606 | 2604 | ||
| 2607 | void GMainWindow::ShowMouseCursor() { | 2605 | void GMainWindow::ShowMouseCursor() { |
| 2608 | unsetCursor(); | 2606 | render_window->unsetCursor(); |
| 2609 | if (emu_thread != nullptr && UISettings::values.hide_mouse) { | 2607 | if (emu_thread != nullptr && UISettings::values.hide_mouse) { |
| 2610 | mouse_hide_timer.start(); | 2608 | mouse_hide_timer.start(); |
| 2611 | } | 2609 | } |
| 2612 | } | 2610 | } |
| 2613 | 2611 | ||
| 2614 | void GMainWindow::mouseMoveEvent(QMouseEvent* event) { | 2612 | void GMainWindow::OnMouseActivity() { |
| 2615 | ShowMouseCursor(); | ||
| 2616 | } | ||
| 2617 | |||
| 2618 | void GMainWindow::mousePressEvent(QMouseEvent* event) { | ||
| 2619 | ShowMouseCursor(); | 2613 | ShowMouseCursor(); |
| 2620 | } | 2614 | } |
| 2621 | 2615 | ||
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index ea6d2c30d..31788ea62 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -248,6 +248,7 @@ private slots: | |||
| 248 | void OnCoreError(Core::System::ResultStatus, std::string); | 248 | void OnCoreError(Core::System::ResultStatus, std::string); |
| 249 | void OnReinitializeKeys(ReinitializeKeyBehavior behavior); | 249 | void OnReinitializeKeys(ReinitializeKeyBehavior behavior); |
| 250 | void OnLanguageChanged(const QString& locale); | 250 | void OnLanguageChanged(const QString& locale); |
| 251 | void OnMouseActivity(); | ||
| 251 | 252 | ||
| 252 | private: | 253 | private: |
| 253 | void RemoveBaseContent(u64 program_id, const QString& entry_type); | 254 | void RemoveBaseContent(u64 program_id, const QString& entry_type); |
| @@ -335,6 +336,4 @@ protected: | |||
| 335 | void dropEvent(QDropEvent* event) override; | 336 | void dropEvent(QDropEvent* event) override; |
| 336 | void dragEnterEvent(QDragEnterEvent* event) override; | 337 | void dragEnterEvent(QDragEnterEvent* event) override; |
| 337 | void dragMoveEvent(QDragMoveEvent* event) override; | 338 | void dragMoveEvent(QDragMoveEvent* event) override; |
| 338 | void mouseMoveEvent(QMouseEvent* event) override; | ||
| 339 | void mousePressEvent(QMouseEvent* event) override; | ||
| 340 | }; | 339 | }; |
diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp index 2d491d8c0..b637e771e 100644 --- a/src/yuzu/util/url_request_interceptor.cpp +++ b/src/yuzu/util/url_request_interceptor.cpp | |||
| @@ -22,6 +22,8 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) { | |||
| 22 | case QWebEngineUrlRequestInfo::ResourceTypeXhr: | 22 | case QWebEngineUrlRequestInfo::ResourceTypeXhr: |
| 23 | emit FrameChanged(); | 23 | emit FrameChanged(); |
| 24 | break; | 24 | break; |
| 25 | default: | ||
| 26 | break; | ||
| 25 | } | 27 | } |
| 26 | } | 28 | } |
| 27 | 29 | ||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 38075c345..41ef6f6b8 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -344,7 +344,7 @@ void Config::ReadValues() { | |||
| 344 | 344 | ||
| 345 | // System | 345 | // System |
| 346 | Settings::values.use_docked_mode.SetValue( | 346 | Settings::values.use_docked_mode.SetValue( |
| 347 | sdl2_config->GetBoolean("System", "use_docked_mode", false)); | 347 | sdl2_config->GetBoolean("System", "use_docked_mode", true)); |
| 348 | 348 | ||
| 349 | Settings::values.current_user = std::clamp<int>( | 349 | Settings::values.current_user = std::clamp<int>( |
| 350 | sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); | 350 | sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 2d4b98d9a..3ee0e037d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -274,7 +274,7 @@ gamecard_path = | |||
| 274 | 274 | ||
| 275 | [System] | 275 | [System] |
| 276 | # Whether the system is docked | 276 | # Whether the system is docked |
| 277 | # 1: Yes, 0 (default): No | 277 | # 1 (default): Yes, 0: No |
| 278 | use_docked_mode = | 278 | use_docked_mode = |
| 279 | 279 | ||
| 280 | # Allow the use of NFC in games | 280 | # Allow the use of NFC in games |
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 2497c71ae..4faf62ede 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -95,8 +95,6 @@ int main(int argc, char** argv) { | |||
| 95 | int option_index = 0; | 95 | int option_index = 0; |
| 96 | 96 | ||
| 97 | InitializeLogging(); | 97 | InitializeLogging(); |
| 98 | |||
| 99 | char* endarg; | ||
| 100 | #ifdef _WIN32 | 98 | #ifdef _WIN32 |
| 101 | int argc_w; | 99 | int argc_w; |
| 102 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); | 100 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); |
| @@ -202,7 +200,7 @@ int main(int argc, char** argv) { | |||
| 202 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); | 200 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); |
| 203 | const u16 error_id = static_cast<u16>(load_result) - loader_id; | 201 | const u16 error_id = static_cast<u16>(load_result) - loader_id; |
| 204 | LOG_CRITICAL(Frontend, | 202 | LOG_CRITICAL(Frontend, |
| 205 | "While attempting to load the ROM requested, an error occured. Please " | 203 | "While attempting to load the ROM requested, an error occurred. Please " |
| 206 | "refer to the yuzu wiki for more information or the yuzu discord for " | 204 | "refer to the yuzu wiki for more information or the yuzu discord for " |
| 207 | "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", | 205 | "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", |
| 208 | loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); | 206 | loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 91684e96e..0aa143e1f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -86,7 +86,7 @@ void Config::ReadValues() { | |||
| 86 | Settings::values.touchscreen.diameter_y = 15; | 86 | Settings::values.touchscreen.diameter_y = 15; |
| 87 | 87 | ||
| 88 | Settings::values.use_docked_mode.SetValue( | 88 | Settings::values.use_docked_mode.SetValue( |
| 89 | sdl2_config->GetBoolean("Controls", "use_docked_mode", false)); | 89 | sdl2_config->GetBoolean("Controls", "use_docked_mode", true)); |
| 90 | 90 | ||
| 91 | // Data Storage | 91 | // Data Storage |
| 92 | Settings::values.use_virtual_sd = | 92 | Settings::values.use_virtual_sd = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 3eb64e9d7..779c3791b 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -116,7 +116,7 @@ use_virtual_sd = | |||
| 116 | 116 | ||
| 117 | [System] | 117 | [System] |
| 118 | # Whether the system is docked | 118 | # Whether the system is docked |
| 119 | # 1: Yes, 0 (default): No | 119 | # 1 (default): Yes, 0: No |
| 120 | use_docked_mode = | 120 | use_docked_mode = |
| 121 | 121 | ||
| 122 | # Allow the use of NFC in games | 122 | # Allow the use of NFC in games |
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 6435ffabb..09cf2ad77 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp | |||
| @@ -242,7 +242,7 @@ int main(int argc, char** argv) { | |||
| 242 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); | 242 | const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); |
| 243 | const u16 error_id = static_cast<u16>(load_result) - loader_id; | 243 | const u16 error_id = static_cast<u16>(load_result) - loader_id; |
| 244 | LOG_CRITICAL(Frontend, | 244 | LOG_CRITICAL(Frontend, |
| 245 | "While attempting to load the ROM requested, an error occured. Please " | 245 | "While attempting to load the ROM requested, an error occurred. Please " |
| 246 | "refer to the yuzu wiki for more information or the yuzu discord for " | 246 | "refer to the yuzu wiki for more information or the yuzu discord for " |
| 247 | "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", | 247 | "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", |
| 248 | loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); | 248 | loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); |